This chunk took minutes
library(phyloseq)
library(ggord)
library(ggplot2)
library(viridis)
library(mgcViz)
library(dichromat)
library(ggpattern)
library(expss)
library(dplyr)
library(tidyverse)
library(GGally)
library(vegan)
library(ggpubr)
library(pheatmap)
library(lubridate)
library(ggsci)
library(RColorBrewer)
library(microbiome)
library(vegan)
library(lme4)
library(performance)
library(ggrepel)
library(glmmTMB)
library(mgcv)
library(gratia)
library(speedyseq)
library(ggeffects)
library(MuMIn)
library(effects)
library(broom)
library(cowplot)
library("coxme")
library(survival)
library(SpiecEasi)
library(NetCoMi)
memory.limit(1000000)
## [1] 1e+06
This chunk took 0.32 minutes
meerkat_final<-readRDS("C:\\Users\\risel\\Dropbox\\Sommer postdoc\\Meerkat project\\PROJECTS\\MeerkatSpikeInData\\Analysis updated\\6. TB AND MB POPULATION LEVEL\\Published data and code\\processed_data.RDS")
names(sample_data(meerkat_final))
## [1] "feature.id" "IndividID" "SampleDate"
## [4] "SampleTime" "Storage" "Seq_run"
## [7] "Imtechella" "Allobacillus" "Seq_depth"
## [10] "scaling_factor" "Seq_depth_nospikein" "BirthDate"
## [13] "DeathDate" "AgeAtSampling" "GroupAtSampling"
## [16] "Condition_weight" "SocialStatus" "Temp_max"
## [19] "sum_rainfall_month" "HoursAfterSunrise" "TimeCat"
## [22] "Year" "month" "Season"
## [25] "TB_status" "TB_exposure" "TB_resistance"
## [28] "TB_survival" "TB_symptom_date" "TB_exposure_date"
## [31] "TB_stage"
asv_table<-data.frame(meerkat_final@otu_table@.Data)
asv_table[1:5,1:5]
names(asv_table)<-sample_data(meerkat_final)$feature.id
meerkat_normalized<-sweep(asv_table, MARGIN=2, sample_data(meerkat_final)$scaling_factor, `*`)
meerkat_normalized<-round(meerkat_normalized, digits = 0)
meerkat_normalized<-otu_table(meerkat_normalized, taxa_are_rows = TRUE)
meerkat_scaled<-meerkat_final
otu_table(meerkat_scaled)<-meerkat_normalized
### add total abundance of reads to dataframe
sample_data(meerkat_scaled)$TotalAbundance<-sample_sums(meerkat_scaled)
meerkat_scaled
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 26122 taxa and 1142 samples ]:
## sample_data() Sample Data: [ 1142 samples by 32 sample variables ]:
## tax_table() Taxonomy Table: [ 26122 taxa by 7 taxonomic ranks ]:
## phy_tree() Phylogenetic Tree: [ 26122 tips and 26121 internal nodes ]:
## taxa are rows
#Filter samples
meerkat_filtered<-subset_samples(meerkat_scaled, Seq_depth_nospikein >3000)
meerkat_filtered<-subset_samples(meerkat_filtered, sample_sums(meerkat_filtered)>1000)
meerkat_filtered
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 26122 taxa and 1141 samples ]:
## sample_data() Sample Data: [ 1141 samples by 32 sample variables ]:
## tax_table() Taxonomy Table: [ 26122 taxa by 7 taxonomic ranks ]:
## phy_tree() Phylogenetic Tree: [ 26122 tips and 26121 internal nodes ]:
## taxa are rows
This chunk took 0.35 minutes
ggplot(data=sample_data(meerkat_filtered),aes(x=SampleDate,y=reorder(IndividID, SampleDate),group=IndividID))+
geom_line(size=0.1,alpha=0.5, col = "darkgrey")+
geom_point(size=2, aes(fill = TB_stage), pch = 21)+
geom_point(data = subset(sample_data(meerkat_filtered), TB_stage == "Diseased"),size=2, fill = "brown2", pch = 21)+
scale_fill_manual(values = c("forestgreen", "skyblue", "brown2"))+
theme_bw(base_size = 14)+
theme(strip.background = element_blank(), strip.text = element_blank())+
theme(panel.grid.minor=element_blank(),panel.grid.major=element_blank(),
panel.background=element_blank())+
theme(axis.text=element_blank(),axis.title.x = element_blank(),
axis.ticks.y=element_blank())+
scale_x_date(date_breaks = "5 year")+
ylab("Meerkat ID")+
geom_vline(xintercept = as.Date("2000-01-01"), linetype = "dashed", col = "grey")+
geom_vline(xintercept = as.Date("2005-01-01"), linetype = "dashed", col = "grey")+
geom_vline(xintercept = as.Date("2010-01-01"), linetype = "dashed", col = "grey")+
geom_vline(xintercept = as.Date("2015-01-01"), linetype = "dashed", col = "grey")+
geom_vline(xintercept = as.Date("2020-01-01"), linetype = "dashed", col = "grey")+
coord_cartesian(xlim = c(as.Date("1997-01-01"), as.Date("2020-01-01")))+
theme(plot.margin=unit(c(0.2,0.3,0,0.2),"cm"))+
guides(fill = guide_legend(override.aes = list(size = 3)))
This chunk took 0.04 minutes
daily_temp<-read.csv("C:/Users/risel/Dropbox/Sommer postdoc/Meerkat project/PROJECTS/MEERKAT DATA/Weather data/SA Weather service data/formatted_daily_temp_VanZylsrus_1997_2019.csv")[-1]
daily_rainfall<-read.csv("C:/Users/risel/Dropbox/Sommer postdoc/Meerkat project/PROJECTS/MEERKAT DATA/Weather data/SA Weather service data/formatted_daily_rainfall_VanZylsrus_1997_2019.csv")[-1]
daily_rainfall$Date<-as.Date(daily_rainfall$Date, "%Y-%m-%d")
daily_temp$Date<-as.Date(daily_temp$Date, "%Y-%m-%d")
daily_temp<-na.omit(daily_temp)
daily_rainfall$Year<-lubridate::year(daily_rainfall$Date)
daily_temp$Year<-lubridate::year(daily_temp$Date)
# rainfall
yearly_rainfall_df<-daily_rainfall %>% group_by(Year) %>% summarize(Measurement = mean(Rainfall.mm.), se = sd(Rainfall.mm.)/sqrt(n()))
yearly_rainfall_df<-subset(yearly_rainfall_df, Year != 2020)
yearly_rainfall_df<-subset(yearly_rainfall_df, Year != 1996)
## temp ######
## temp ######
## temp ######
## temp ######
yearly_temp_df<-daily_temp %>% group_by(Year, Measure) %>% summarize(Measurement = mean(Temperature), se = sd(Temperature)/sqrt(n()))
yearly_temp_df<-subset(yearly_temp_df, Year != 2020)
yearly_temp_df<-subset(yearly_temp_df, Year != 1996)
yearly_temp_df<-subset(yearly_temp_df, Measure == "mx")
##### merge
yearly_rainfall_df$Climate <- "Rainfall"
yearly_temp_df$Climate <- "Max temp"
yearly_temp_df<-yearly_temp_df[,-2]
climate_df_Year<-rbind(yearly_rainfall_df, yearly_temp_df)
This chunk took 0 minutes
TB_by_year <- read.csv("C:/Users/risel/Dropbox/Sommer postdoc/Meerkat project/PROJECTS/MEERKAT DATA/TB data/TB_by_year_ID_and_groups.csv")
TB_summary_year<-TB_by_year %>% group_by(year, current_TB_status_2) %>% summarize(freq = n())
TB_summary_year2<-TB_by_year %>% group_by(year) %>% summarize(freq = n())
TB_summary_year <-subset(TB_summary_year, year > 1996)
TB_summary_year <-subset(TB_summary_year, year <2020)
TB_summary_year$Total<- vlookup(TB_summary_year$year, TB_summary_year2, lookup_column = "year", result_column = "freq")
TB_summary_year$Proportion<- TB_summary_year$freq/ TB_summary_year$Total
TB_summary_year$se <- sqrt(TB_summary_year$Proportion∗(1−TB_summary_year$Proportion)/TB_summary_year$Total)
names(TB_summary_year)[2]<-"TB_status"
TB_summary_year$TB_status<-factor(TB_summary_year$TB_status, levels = c("unexposed", "exposed", "TB_signs"))
levels(TB_summary_year$TB_status)<-c("TB unexposed", "TB exposed", "TB diseased")
names(TB_summary_year)[1]<-"Year"
TB_summary_year<-data.frame(TB_summary_year)
plot_TB2<-ggplot(subset(TB_summary_year, TB_status != "TB unexposed"), aes(x = Year, y = Proportion, group = TB_status, fill = TB_status, col =TB_status))+
theme_bw(base_size = 14)+
geom_path(size = 1)+
geom_errorbar(aes(min = Proportion -se, max = Proportion +se), width = 0, size = 1)+
geom_point(size = 3, pch = 21, col = "black")+
scale_fill_manual(values = c("goldenrod1", "red"))+
scale_color_manual(values = c("goldenrod1", "red"))+
ylab("Proportion population")+
labs(fill = "TB status", col = "TB status")+
xlab("Year")+
theme(legend.position = c(0.25, 0.90),
legend.title = element_blank(),
legend.key.height = unit(0.5, 'cm'),
legend.key.width = unit(0.5, 'cm'),
legend.text =element_text(size=14))+
scale_x_continuous(breaks = c(1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015,2016, 2017, 2018, 2019), expand = c(0.01,0.01))+
theme(axis.text.x = element_text(angle = 90, vjust=0.5))+
theme( legend.background = element_blank(),
legend.box.background =element_blank())
yearly_temp_df<-data.frame(yearly_temp_df)
yearly_temp_df$Year<-as.integer(yearly_temp_df$Year)
# temp
plot_temp2<- ggplot(yearly_temp_df, aes(x = Year, y = Measurement))+
theme_bw(base_size = 14)+
geom_smooth(method = "lm", col = "grey")+
xlab("Year")+
geom_path(size = 1, col = "grey")+
geom_errorbar(aes(min = Measurement - se, max = Measurement +se), width = 0)+
geom_point(size = 3, pch = 21, col = "black", fill = "darkgrey")+
ylab("Mean max. temp.")+
scale_x_continuous(breaks = c(1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015,2016, 2017, 2018, 2019), expand = c(0,0))+
theme(axis.text.x = element_text(angle = 90, vjust=0.5))
# rainfall
plot_rainfall<- ggplot(yearly_rainfall_df, aes(x = Year, y = Measurement))+
theme_bw(base_size = 14)+
geom_smooth(method = "lm", col = "grey", alpha = 0.2)+
xlab("Year")+
geom_path(size = 1, col = "grey")+
geom_errorbar(aes(min = Measurement - se, max = Measurement +se), width = 0)+
geom_point(size = 2, pch = 21, col = "black", fill = "darkgrey")+
ylab("Mean rainfall")+
scale_x_continuous(breaks = c(1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015,2016, 2017, 2018, 2019), expand = c(0,0))+
theme(axis.text.x = element_text(angle = 90, vjust=0.5))+
coord_cartesian(ylim = c(0,1.5))
# Figure 1
ggarrange(plot_temp2, plot_rainfall, plot_TB2, nrow = 1, ncol = 3, labels = c("a)","b)", "c)"))
#ggsave("Figures/Fig1.pdf")
This chunk took 0.02 minutes
meerkat_class<-tax_glom(meerkat_filtered, taxrank = "Class")
sample_data(meerkat_class)<-sample_data(meerkat_class)[,c("feature.id")]
meerkat_class<-microbiome::transform(meerkat_class, "compositional")
class_per_sample<-psmelt(meerkat_class)
top_class<-c("Bacteroidia","Fusobacteriia" , "Gammaproteobacteria", "Clostridia",
"Coriobacteriia","Actinobacteria", "Bacilli")
class_per_sample$Class_plot<-as.character(ifelse(class_per_sample$Class %in% top_class, class_per_sample$Class, "Other"))
class_per_sample %>% group_by(Class) %>% summarize(mean = mean(Abundance)) %>% arrange(-mean)
class_per_sample$Class_plot<-factor(class_per_sample$Class_plot, levels = c("Bacteroidia","Fusobacteriia" , "Other","Gammaproteobacteria", "Clostridia",
"Coriobacteriia","Actinobacteria", "Bacilli"))
metadata<-data.frame(sample_data(meerkat_filtered))
names(metadata)
## [1] "feature.id" "IndividID" "SampleDate"
## [4] "SampleTime" "Storage" "Seq_run"
## [7] "Imtechella" "Allobacillus" "Seq_depth"
## [10] "scaling_factor" "Seq_depth_nospikein" "BirthDate"
## [13] "DeathDate" "AgeAtSampling" "GroupAtSampling"
## [16] "Condition_weight" "SocialStatus" "Temp_max"
## [19] "sum_rainfall_month" "HoursAfterSunrise" "TimeCat"
## [22] "Year" "month" "Season"
## [25] "TB_status" "TB_exposure" "TB_resistance"
## [28] "TB_survival" "TB_symptom_date" "TB_exposure_date"
## [31] "TB_stage" "TotalAbundance"
metadata<-metadata[,c("feature.id", "SampleDate", "Year", "Storage", "HoursAfterSunrise", "TotalAbundance", "TB_stage", "Season", "Temp_max", "sum_rainfall_month", "sum_rainfall_month", "Condition_weight", "GroupAtSampling")]
class_per_sample<- merge(class_per_sample, metadata, by = "feature.id")
## microbiome plot
pal<-brewer.pal(10,"Paired")
pal<-c(pal, "lightgrey")
pal<-pal[c(2,1,11,4,3,6,9,10)]
pal<-rev(pal)
plot_per_sample<-ggplot(class_per_sample, aes(y = Abundance, x = reorder(Sample, SampleDate), fill = Class_plot))+
geom_col(position = "fill", stat="identity", size = 0)+
scale_fill_manual(values = rev(pal))+
ylab("Relative abundance")+
theme(axis.text.x = element_blank(), axis.ticks = element_blank(), axis.title.x = element_blank())+
scale_y_continuous(expand = c(0,0)) +
labs(fill = "Bacterial class")+
theme(plot.margin=unit(c(0.2,0.2,0.2,1),"cm"))+
theme(legend.justification = c("left", "center"))
ggplot(class_per_sample, aes(y = Abundance, x = reorder(Sample, SampleDate), fill = Class_plot))+
facet_grid(~Year, scales = "free")+
geom_col(position = "fill", stat="identity", size = 0)+
scale_fill_manual(values = rev(pal))+
ylab("Relative abundance")+
theme(axis.text.x = element_blank(), axis.ticks = element_blank(), axis.title.x = element_blank())+
scale_y_continuous(expand = c(0,0)) +
labs(fill = "Bacterial class")+
theme(plot.margin=unit(c(0.2,0.2,0.2,1),"cm"))+
theme(legend.justification = c("left", "center"))
## TB stage
levels(class_per_sample$TB_stage)
## [1] "Unexposed" "Exposed" "Diseased"
levels(class_per_sample$TB_stage)<-c("TB unexposed", "TB exposed", "TB diseased")
#levels(class_per_sample$TB_stage_weight)<-c("TB unexposed", "TB exposed", "TB diseased")
table(class_per_sample$TB_stage)
##
## TB unexposed TB exposed TB diseased
## 43363 64890 9270
plot_TB_sample<-ggplot(class_per_sample, aes(y = 1, x = reorder(Sample, SampleDate), fill = TB_stage))+
geom_tile()+
theme_void()+
scale_fill_manual(values = c( "forestgreen", "skyblue","brown2"))+
theme(axis.title.y = element_text(angle = 0))+
ylab("TB")+
theme(legend.position="right", legend.direction="vertical",
legend.justification = c("left", "top"),
legend.key.height = unit(0.3, 'cm'),
legend.title=element_blank())
# temp max
pal_dichromat<-dichromat::colorschemes$BluetoOrange.12
pal_dichromat[13]<-"black"
pal_dichromat[14]<-"black"
plot_temp1<-ggplot(class_per_sample, aes(y = 1, x = reorder(Sample, SampleDate), fill = Temp_max))+geom_tile()+
theme_void()+
scale_fill_gradientn(colours = pal_dichromat)+
theme(legend.position="right", legend.direction="vertical",
legend.justification = c("left", "top"),
legend.key.height = unit(0.15, 'cm'),
legend.title=element_blank()) +
theme(axis.title.y = element_text(angle = 0))+
ylab(" Max \ntemp")
## season
plot_season<-ggplot(class_per_sample, aes(y = 1, x = reorder(Sample, SampleDate), fill = Season))+
geom_tile()+
theme_void()+
scale_fill_manual(values = c("gold", "forestgreen"))+
theme(legend.position="right", legend.direction="vertical",
legend.justification = c("left", "top"),
legend.key.height = unit(0.1, 'cm'),
legend.title=element_blank())+
theme(axis.title.y = element_text(angle = 0))+
ylab("Season")
# year
year_names <- c(
`1997` = "",
`1998` = "",
`1999` = "1999",
`2000` = "",
`2001` = "2001",
`2002` = "2002",
`2003` = "2003",
`2004` = "2004",
`2005` = "2005",
`2006` = "2006",
`2007` = "2007",
`2008` = "2008",
`2009` = "2009",
`2010` = "2010",
`2011` = "2011",
`2012` = "2012",
`2013` = "2013",
`2014` = "2014",
`2015` = "2015",
`2016` = "2016",
`2017` = "2017",
`2018` = "2018",
`2019` = "")
# 13 x 6
# ####methods
# ####methods
# ####methods
# ####methods
drought_years<- c(1997, 1998, 2000, 2006, 2013, 2014, 2015, 2017, 2018)
class_per_sample$Drought<-ifelse(class_per_sample$Year %in% drought_years, "Drought", "Normal")
drought_cols<- c("red", "red", "skyblue", "red", "skyblue", # 1997 - 2001
"skyblue","skyblue","skyblue","skyblue", "red", # 2002 - 2006
"skyblue","skyblue","skyblue","skyblue","skyblue", # 2007 - 2011
"red", "skyblue", "red", "red", "skyblue", # 2012 - 2016
"red", "red" , "skyblue" ) # 2017 - 2019
plot_year<-ggplot(class_per_sample, aes(y = 1, x = reorder(Sample, SampleDate), fill = Drought))+
geom_tile()+
theme_void()+
# scale_fill_viridis(discrete = F)+
scale_fill_manual(values = c("red", "skyblue"))+
facet_grid(~Year, scales = "free", space = "free_x",switch = "x", labeller = as_labeller(year_names))+
theme(panel.spacing = unit(0.07, "lines"),
strip.background = element_blank(),
strip.placement = "outside",
strip.text.x = element_text(angle = 90, hjust = 0.5)) +
xlab("")+
theme(axis.title.y = element_text(angle = 0, hjust = ))+
ylab("Year ")+
theme(axis.title.x = element_text(colour = "black"))+
theme(plot.margin=unit(c(0,2.8,0.2,1.55),"cm")) +
theme(legend.position="right", legend.direction="vertical",
legend.justification = c("left", "top"),
legend.key.height = unit(0.2, 'cm'),
legend.title=element_blank())
# rainfall
plot_rainfall<-ggplot(class_per_sample, aes(y = 1, x = reorder(Sample, SampleDate), fill = log10(sum_rainfall_month+1)))+geom_tile()+
theme_void()+
scale_fill_gradientn(colours = pal_dichromat[1:12])+
theme(legend.position="right", legend.direction="vertical",
legend.justification = c("left", "top"),
legend.key.height = unit(0.2, 'cm'),
legend.title=element_blank())+
theme(axis.title.y = element_text(angle = 0))+
ylab("Rainfall")
## body condition
#scales::show_col(pal_dichromat_cond)
pal_dichromat_cond<-c("black", "black", "black", pal_dichromat[1:12])
pal_dichromat_cond[8]<-"white"
pal_dichromat_cond[9]<-"white"
pal_dichromat_cond[10]<-"white"
pal_dichromat_cond[16]<-"red"
plot_condition<-ggplot(class_per_sample, aes(y = 1, x = reorder(Sample, SampleDate), fill = Condition_weight))+
geom_tile()+
theme_void()+
scale_fill_gradientn(colours = pal_dichromat_cond)+
theme(legend.position="right", legend.direction="vertical",
legend.justification = c("left", "top"),
legend.key.height = unit(0.2, 'cm'),
legend.title=element_blank())+
theme(axis.title.y = element_text(angle = 0))+
ylab("Condition")
## without storage
sup_figabc<-ggarrange(plot_per_sample, plot_TB_sample, plot_condition, plot_temp1, plot_rainfall, plot_season, ncol = 1, align = "v", heights = c(8,1,1, 1, 1,1, 1, 1.5), labels= c("a)", "b)", "c)", "d)", "e)", "f)"))
ggarrange(sup_figabc, plot_year, ncol = 1, heights = c(7,1.1), labels = c(NA, "g)"))
# fig 2
#ggsave("Figures/Fig2.pdf")
# Saving 16.4 x 7.97 in image
This chunk took 0.41 minutes
meerkat_merged_ID<-merge_samples(meerkat_filtered,"IndividID", fun=mean)
sample_data(meerkat_merged_ID)$IndividID<-sample_names(meerkat_merged_ID)
meerkat_merged_ID_class<-tax_glom(meerkat_merged_ID, taxrank = "Class")
sample_data(meerkat_merged_ID_class)<-sample_data(meerkat_merged_ID_class)[,"IndividID"]
meerkat_merged_ID_class<-microbiome::transform(meerkat_merged_ID_class, "compositional")
class_per_ID<-psmelt(meerkat_merged_ID_class)
class_abundance<-class_per_ID %>% group_by(Class) %>% summarise(mean = mean(Abundance))
class_abundance<-class_abundance[order(-class_abundance$mean),]
top_class<-class_abundance$Class[1:7]
class_per_ID$Class_plot<-as.character(ifelse(class_per_ID$Class %in% top_class, class_per_ID$Class, "Other"))
class_per_ID$Class_plot<-factor(class_per_ID$Class_plot, levels = c("Bacteroidia","Fusobacteriia" , "Other","Gammaproteobacteria", "Clostridia",
"Coriobacteriia","Actinobacteria", "Bacilli"))
## order by birthyear #####
## order by birthyear #####
## order by birthyear #####
## order by birthyear #####
metadata<-data.frame(sample_data(meerkat_filtered))
metadata$BirthYear<-lubridate::year(sample_data(meerkat_filtered)$BirthDate)
metadata<-metadata[,c("feature.id", "IndividID", "TB_status", "BirthYear","BirthDate")]
head(metadata)
metadata2<-distinct(metadata, IndividID, .keep_all = T)
metadata2<-metadata2 %>% arrange(BirthDate)
head(metadata2)
ID_order<-metadata2$IndividID
class_per_ID<-merge(class_per_ID, metadata2, by.y = "IndividID", by.x = "Sample")
class_per_ID$TB_status<-factor(class_per_ID$TB_status, levels = c("unexposed", "exposed_asymptomatic", "exposed_symptomatic"))
levels(class_per_ID$TB_status)<-c("TB unexposed", "TB exposed", "TB diseased")
metadata2$IndividID<-factor(metadata2$IndividID, levels =ID_order)
class_per_ID$IndividID<-factor(class_per_ID$IndividID, levels =ID_order)
#class_per_ID<-class_per_ID%>% group_by(IndividID, Class_plot)%>%summarize(Abundance = sum(Abundance))
class_per_ID<-data.frame(class_per_ID)
#########################
#########################
#########################
#########################
microbiome_barplot <- ggplot(class_per_ID, aes(x = IndividID, y = Abundance, fill = Class_plot, col = Class_plot))+
theme_gray(base_size = 14)+
geom_bar( stat="identity", width = 0.5)+
scale_fill_manual(values = rev(pal))+
scale_color_manual(values = rev(pal), guide = "none")+
theme(legend.position = "right")+
theme(axis.text.x = element_blank(), axis.title.x = element_blank(), axis.ticks.x = element_blank())+
theme(plot.margin=unit(c(0.4,0,0,0),"cm"))+
scale_y_continuous(expand = c(0,0)) +
labs(fill = "Bacterial class")+
theme(legend.justification = c("left", "center"))
######## TB
p2<-ggplot(metadata2, aes(x = IndividID, y = 1, fill = TB_status))+
geom_tile(col = "white")+
theme_void(base_size = 14)+
scale_fill_manual(values = c("forestgreen", "cyan3", "brown2"))+
theme(legend.position = "right", legend.justification = "left") +
theme(axis.text.x = element_blank(), axis.title.x = element_blank(), axis.ticks.x = element_blank())+
theme(axis.text.y = element_blank())+
theme(axis.title.y = element_text(angle = 90, vjust=0.5))+
ylab("TB \nstatus")+
theme(plot.margin=unit(c(0,0,0,0.1),"cm"))+
labs(fill = "TB status")
######## birth year
p3<-ggplot(metadata2, aes(y = 1, x = IndividID, fill = BirthYear))+
geom_tile()+
theme_void(base_size = 14)+
scale_fill_viridis(discrete = F)+
facet_grid(~BirthYear, scales = "free", space = "free_x",switch = "x")+
theme(panel.spacing = unit(0.05, "lines"),
strip.background = element_blank(),
strip.placement = "outside",
strip.text.x = element_text(angle = 90, size = , hjust = 0.5)) +
xlab("")+
theme(axis.title.y = element_text(angle = 90, vjust=9))+
ylab("Birth \nyear")+
theme(axis.title.x = element_text(colour = "black"))+
theme(plot.margin=unit(c(0, 5.7, 0.2,1.1),"cm"))+
theme(legend.position="none")
figsxab<-ggarrange(microbiome_barplot,p2, ncol = 1, align = "v", heights = c(2,0.5))
ggarrange(figsxab, p3, ncol = 1, heights = c(3,0.8))
#ggsave("Figures/FigS6.pdf")
# Saving 16.9 x 7.97 in image
This chunk took 0.1 minutes
bacteroidia_2005<-subset(class_per_ID, Class_plot == "Bacteroidia" & BirthYear <2006 )
bacteroidia_2014<-subset(class_per_ID, Class_plot == "Bacteroidia"& BirthYear >2013 )
length(unique(bacteroidia_2005$IndividID))
## [1] 84
length(unique(bacteroidia_2014$IndividID))
## [1] 83
summary(bacteroidia_2005$Abundance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000000 0.004754 0.017053 0.037109 0.048031 0.255336
sd(bacteroidia_2005$Abundance)
## [1] 0.04815908
summary(bacteroidia_2014$Abundance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000689 0.048280 0.099646 0.100043 0.141796 0.327672
sd(bacteroidia_2014$Abundance)
## [1] 0.06791717
bacilli_2005<-subset(class_per_ID, Class_plot == "Bacilli" & BirthYear <2006 )
bacilli_2014<-subset(class_per_ID, Class_plot == "Bacilli"& BirthYear >2014 )
summary(bacilli_2005$Abundance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.006604 0.062771 0.095245 0.131469 0.163677 0.765236
sd(bacilli_2005$Abundance)
## [1] 0.1176245
summary(bacilli_2014$Abundance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.009551 0.033834 0.052679 0.074724 0.103429 0.326442
sd(bacilli_2014$Abundance)
## [1] 0.06493677
This chunk took 0 minutes
storage_ps_class<-tax_glom(meerkat_filtered, taxrank = "Class")
storage_ps_class<-core(storage_ps_class, detection = 10, prevalence = 0)
sample_data(storage_ps_class)<-sample_data(storage_ps_class)[,c("feature.id", "Storage", "IndividID", "Year", "TimeCat")]
class_per_storage<-psmelt(storage_ps_class)
class_per_storage$Class_plot<-as.character(ifelse(class_per_storage$Class %in% top_class, class_per_storage$Class, "Other"))
class_per_storage$Class_plot<-factor(class_per_storage$Class_plot, levels = c("Bacteroidia","Fusobacteriia" , "Other","Gammaproteobacteria", "Clostridia",
"Coriobacteriia","Actinobacteria", "Bacilli"))
class_per_storage$Storage<-factor(class_per_storage$Storage, levels = c("FROZEN", "FREEZEDRIED"))
################
class_per_storage$Storage<-factor(class_per_storage$Storage, levels = c("FROZEN", "FREEZEDRIED"))
ggplot(class_per_storage, aes(x =Year, y = Abundance, fill = Class_plot))+
geom_col(position = "fill", stat="identity", size = 0)+
ylab("Relative abundance")+
facet_wrap(~Storage, ncol = 1, strip.position="right")+
scale_fill_manual(values = rev(pal))+
xlim(c(1996, 2020))+
# theme(plot.margin=unit(c(0.2,0,0,0.1),"cm"))+
labs(fill = "Bacterial class")+
ggtitle("Yearly composition by storage method")
This chunk took 0.06 minutes
# time time of day
class_per_storage$TimeCat <- factor(class_per_storage$TimeCat, levels = c( "MORNING", "AFTERNOON"))
ggplot(class_per_storage, aes(x =Year, y = Abundance, fill = Class_plot))+
geom_col(position = "fill", stat="identity", size = 0)+
ylab("Relative abundance")+
facet_wrap(~TimeCat, ncol = 2)+
scale_fill_manual(values = rev(pal))+
xlim(c(1996, 2020))+
# theme(plot.margin=unit(c(0.2,0,0,0.1),"cm"))+
labs(fill = "Bacterial class")+
ggtitle("Yearly composition by time of day")
This chunk took 0.04 minutes
## season
season_ps_class<-tax_glom(meerkat_filtered, taxrank = "Class")
season_ps_class<-core(season_ps_class, detection = 10, prevalence = 0)
sample_data(season_ps_class)<-sample_data(season_ps_class)[,c("feature.id", "Season", "IndividID", "Year")]
class_per_season<-psmelt(season_ps_class)
class_per_season$Class_plot<-as.character(ifelse(class_per_season$Class %in% top_class, class_per_season$Class, "Other"))
class_per_season$Class_plot<-factor(class_per_season$Class_plot, levels = c("Bacteroidia","Fusobacteriia" , "Other","Gammaproteobacteria", "Clostridia",
"Coriobacteriia","Actinobacteria", "Bacilli"))
################
ggplot(class_per_season, aes(x =Year, y = Abundance, fill = Class_plot))+
geom_col(position = "fill", stat="identity", size = 0)+
ylab("Relative abundance")+
facet_wrap(~Season, ncol = 1, strip.position="right")+
scale_fill_manual(values = rev(pal))+
xlim(c(1996, 2020))+
# theme(plot.margin=unit(c(0.2,0,0,0.1),"cm"))+
labs(fill = "Bacterial class")+
ggtitle("Yearly composition by season")
This chunk took 0.07 minutes
metadata_groups<-data.frame(sample_data(meerkat_filtered))
group_freq<-data.frame(table(metadata_groups$GroupAtSampling))
group_freq<-group_freq %>% arrange(-Freq)
head(group_freq, 10)
top_groups<-group_freq$Var1[1:15]
group_ps_class<-tax_glom(meerkat_filtered, taxrank = "Class")
group_ps_class<-core(group_ps_class, detection = 10, prevalence = 0)
sample_data(group_ps_class)<-sample_data(group_ps_class)[,c("feature.id", "Storage", "IndividID", "Year", "SampleDate", "GroupAtSampling")]
class_per_storage<-psmelt(group_ps_class)
class_per_storage$Class_plot<-as.character(ifelse(class_per_storage$Class %in% top_class, class_per_storage$Class, "Other"))
class_per_storage$Class_plot<-factor(class_per_storage$Class_plot, levels = c("Bacteroidia","Fusobacteriia" , "Other","Gammaproteobacteria", "Clostridia",
"Coriobacteriia","Actinobacteria", "Bacilli"))
class_per_storage$Group_plot<-ifelse(class_per_storage$GroupAtSampling %in% top_groups, as.character(class_per_storage$GroupAtSampling), "Other")
class_per_storage$Group_plot<-forcats::fct_reorder(class_per_storage$Group_plot, class_per_storage$Year, mean)
length(unique(subset(class_per_storage, Group_plot != "Other")$feature.id))
## [1] 1003
ggplot(subset(class_per_storage, Group_plot != "Other"), aes(x =Year, y = Abundance, fill = Class_plot))+
geom_col(position = "fill", stat="identity", size = 0)+
ylab("Realtive abundance")+
scale_fill_manual(values = rev(pal))+
facet_wrap(~Group_plot, ncol = 1, strip.position="right")+
ggtitle("Yearly composition by major social groups")+
scale_y_continuous(breaks = c(0.2, 0.4, 0.6, 0.8))+
labs(fill = "Bacterial class")
This chunk took 0.06 minutes
meerkat_merged_year<-merge_samples(meerkat_filtered,"Year", fun=mean)
sample_data(meerkat_merged_year)$Year<-sample_names(meerkat_merged_year)
meerkat_merged_year_class<-tax_glom(meerkat_merged_year, taxrank = "Class")
sample_data(meerkat_merged_year_class)<-sample_data(meerkat_merged_year_class)[,1]
#meerkat_merged_year_class<-microbiome::transform(meerkat_merged_year_class, "compositional")
class_per_year<-psmelt(meerkat_merged_year_class)
class_per_year$Class_plot<-as.character(ifelse(class_per_year$Class %in% top_class, class_per_year$Class, "Other"))
class_per_year$Class_plot<-factor(class_per_year$Class_plot, levels = c("Bacteroidia","Fusobacteriia" , "Other","Gammaproteobacteria", "Clostridia",
"Coriobacteriia","Actinobacteria", "Bacilli"))
class_per_year$Sample<- as.integer(class_per_year$Sample)
class_per_year<-class_per_year%>% group_by(Sample, Class_plot)%>%summarize(Abundance = sum(Abundance))
class_per_year<-data.frame(class_per_year)
class_per_year$Sample<-factor(class_per_year$Sample)
plot_mb_year<-ggplot(class_per_year, aes(x =Sample, y = Abundance, fill = Class_plot))+
geom_bar( stat="identity", size = 0, position = "fill")+
theme_bw()+
scale_fill_manual(values = rev(pal))+
theme(legend.position = "right")+
xlab("")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.9, hjust=1))+
labs(fill = "Bacterial class")+
scale_y_continuous(expand = c(0,0))+
xlab("Year")
year_freq<-data.frame(table(sample_data(meerkat_filtered)$Year))
year_hist<-ggplot(year_freq, aes(x = Var1, y = Freq)) + geom_col(fill = "lightgray", col = "black")+
# theme(plot.margin=unit(c(0.2,5,0.2,0.2),"cm"))+
theme(axis.text.x = element_blank(), axis.title.x = element_blank())+
theme(legend.position = "none")+
scale_y_sqrt(breaks = c(0,20,50,100,200))+
geom_hline(yintercept = 20, col = "black", linetype = "longdash")
year_plot<-ggarrange(year_hist, plot_mb_year, ncol = 1, align = "v", heights = c(1,7), legend = "none")+
theme(plot.margin=unit(c(0.2,0.2,0.2,0.6),"cm"))
## tb
TB_summary_year2<-subset(TB_summary_year, TB_status == "TB diseased")
extra_rows<-data.frame(as.integer(1997), as.factor("TB diseased") , as.integer(0) , as.integer(165) ,as.numeric(0) ,as.numeric(0))
names(extra_rows)<-names(TB_summary_year2)
extra_rows2<-data.frame(as.integer(1998), as.factor("TB diseased") , as.integer(0) , as.integer(165) ,as.numeric(0) ,as.numeric(0))
names(extra_rows2)<-names(TB_summary_year2)
TB_summary_year3<-rbind(TB_summary_year2, extra_rows, extra_rows2)
TB_summary_year3$Year<-factor(TB_summary_year3$Year)
summary(TB_summary_year3)
## Year TB_status freq Total
## 1997 : 1 TB unexposed: 0 Min. : 0.00 Min. :165.0
## 1998 : 1 TB exposed : 0 1st Qu.: 7.50 1st Qu.:284.5
## 1999 : 1 TB diseased :23 Median :37.00 Median :356.0
## 2000 : 1 Mean :35.48 Mean :341.3
## 2001 : 1 3rd Qu.:59.00 3rd Qu.:416.5
## 2002 : 1 Max. :81.00 Max. :452.0
## (Other):17
## Proportion se
## Min. :0.00000 Min. :0.000000
## 1st Qu.:0.02382 1st Qu.:0.008278
## Median :0.09160 Median :0.014551
## Mean :0.10353 Mean :0.013787
## 3rd Qu.:0.15807 3rd Qu.:0.018940
## Max. :0.29412 Max. :0.029535
##
TB_hist<-ggplot(TB_summary_year3, aes(y = Proportion, x = Year))+
geom_col(fill = "brown2", col = "black")+
# theme(plot.margin=unit(c(0.2,5,0.2,0.2),"cm"))+
theme(axis.text.x = element_blank(), axis.title.x = element_blank())+
theme(legend.position = "none")
year_plot<-ggarrange(year_hist, TB_hist, plot_mb_year, ncol = 1, align = "v", heights = c(1,1, 5), legend = "none", labels = c("b)", "c)", "d)"), label.x = -0.07)+
theme(plot.margin=unit(c(0.2,0.2,0.2,1),"cm"))
year_plot
This chunk took 0.03 minutes
scaled_core<-microbiome::core(meerkat_filtered, detection = 100, prevalence = 0.01)
scaled_core<-tax_glom(meerkat_filtered, taxrank = "Genus")
scaled_core<-microbiome::core(scaled_core, detection = 10, prevalence = 0.005)
scaled_core
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 499 taxa and 1141 samples ]:
## sample_data() Sample Data: [ 1141 samples by 32 sample variables ]:
## tax_table() Taxonomy Table: [ 499 taxa by 7 taxonomic ranks ]:
## phy_tree() Phylogenetic Tree: [ 499 tips and 498 internal nodes ]:
## taxa are rows
This chunk took 0.05 minutes
# otutable
otutable<-data.frame(t(scaled_core@otu_table@.Data))
dim(otutable)
## [1] 1141 499
otutable[1:5,1:5]
colnames(otutable)<-taxa_names(scaled_core)
metadata<-data.frame(sample_data(scaled_core))
names(metadata)
## [1] "feature.id" "IndividID" "SampleDate"
## [4] "SampleTime" "Storage" "Seq_run"
## [7] "Imtechella" "Allobacillus" "Seq_depth"
## [10] "scaling_factor" "Seq_depth_nospikein" "BirthDate"
## [13] "DeathDate" "AgeAtSampling" "GroupAtSampling"
## [16] "Condition_weight" "SocialStatus" "Temp_max"
## [19] "sum_rainfall_month" "HoursAfterSunrise" "TimeCat"
## [22] "Year" "month" "Season"
## [25] "TB_status" "TB_exposure" "TB_resistance"
## [28] "TB_survival" "TB_symptom_date" "TB_exposure_date"
## [31] "TB_stage" "TotalAbundance"
## TB variables
TB_stage<-metadata$TB_stage
TB_status<-metadata$TB_status
TB_survival<-metadata$TB_survival
# condition and season
summary(metadata$Condition_weight)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -274.47 -57.95 -15.40 -14.00 28.04 245.68
metadata$Condition_cat <- ifelse(metadata$Condition_weight < summary(metadata$Condition_weight)[2], "BAD", "AVERAGE")
metadata$Condition_cat <- ifelse(metadata$Condition_weight > summary(metadata$Condition_weight)[5], "GOOD", metadata$Condition_cat)
condition<-metadata$Condition_cat
season <- metadata$Season
## rainfall
summary(log10(metadata$sum_rainfall_month+1))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.8062 0.7770 1.3385 2.2896
metadata$Rainfall_cat <- ifelse(metadata$sum_rainfall_month == summary(metadata$sum_rainfall_month)[2], "LOW", "AVERAGE")
metadata$Rainfall_cat <- ifelse(metadata$sum_rainfall_month > summary(metadata$sum_rainfall_month)[5], "HIGH", metadata$Rainfall_cat)
table(metadata$Rainfall_cat)
##
## AVERAGE HIGH LOW
## 541 281 319
# temp
hist(metadata$Temp_max)
summary(metadata$Temp_max)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 12.60 26.80 32.30 31.25 36.10 43.00
metadata$Max_temp_cat <- ifelse(metadata$Temp_max < summary(metadata$Temp_max)[2], "LOW", "AVERAGE")
metadata$Max_temp_cat <- ifelse(metadata$Temp_max > summary(metadata$Temp_max)[5], "HIGH", metadata$Max_temp_cat)
table(metadata$Max_temp_cat)
##
## AVERAGE HIGH LOW
## 577 283 281
## max temp
max_temp<-metadata$Max_temp_cat
#hist(max_temp)
# year
hist(metadata$Year)
metadata<-metadata %>% mutate(YearCat = case_when((Year <2005 ~ "Pre-2005"),
(Year >=2005 & Year < 2010) ~ "2005-2010",
(Year >=2010 & Year < 2015) ~ "2010-2015",
(Year >=2015) ~ "2015-2019"))
YearCat<-as.character(metadata$YearCat)
### control variables
IndividID<-metadata$IndividID
time<-metadata$TimeCat
maxtemp<-metadata$Max_temp_cat
samplingtemp<-metadata$HourlyTemp
foraging<-log10(metadata$HoursSinceForagingStart+0.1)
rainfall<-metadata$Rainfall_cat
# methods
storage<-metadata$Storage
seq_depth<-as.numeric(scale(metadata$Seq_depth_nospikein))
seq_run<-metadata$Seq_run
This chunk took 0 minutes
###### genus level
wunifrac_dist<-distance(scaled_core, method = "wunifrac")
unifrac_dist<-distance(scaled_core, method = "unifrac")
bray_dist<-distance(scaled_core, method = "bray")
# asv level ###
scaled_core_asv<-microbiome::core(meerkat_filtered, detection = 100, prevalence = 0.005)
wunifrac_dist_asv<-distance(scaled_core_asv, method = "wunifrac")
unifrac_dist_asv<-distance(scaled_core_asv, method = "unifrac")
bray_dist_asv<-distance(scaled_core_asv, method = "bray")
This chunk took 18.43 minutes
# weighted unifrac
final_model<-capscale(wunifrac_dist ~
time +
TB_stage+
condition+
rainfall+
YearCat+
storage+
seq_depth+
IndividID,
env = metadata, comm = otutable)
# weighted unifrac
final_model_anova<-anova.cca(final_model, by="terms")
final_model_anova
final_model_anova_df<-data.frame(final_model_anova)
RsquareAdj(final_model)
## $r.squared
## [1] 0.4318683
##
## $adj.r.squared
## [1] 0.2755368
# write.csv(final_model_anova_df, "dbrda_wunifrac_summary_genus.csv")
## unweighted unifrac
unifrac_model<-capscale(unifrac_dist ~
time +
TB_stage+
condition+
rainfall+
YearCat+
storage+
seq_depth+
IndividID,
env = metadata, comm = otutable)
## unweighted unifrac ##
unifrac_anova<-anova.cca(unifrac_model, by="terms")
unifrac_anova
unifrac_anova_df<-data.frame(unifrac_anova)
#write.csv(unifrac_anova_df, "dbrda_unifrac_summary_genus.csv")
bray_model<-capscale(bray_dist ~
time +
TB_stage+
condition+
rainfall+
YearCat+
storage+
seq_depth+
IndividID,
env = metadata, comm = otutable)
bray_anova<-anova.cca(bray_model, by="terms")
bray_anova
bray_anova_df<-data.frame(bray_anova)
#write.csv(bray_anova_df, "dbrda_bray_summary_genus.csv")
#### ASV level ########
#### ASV level ########
#### ASV level ########
#### ASV level ########
#### ASV level ########
# weighted unifrac
wunifrac_model_asv<-capscale(wunifrac_dist_asv ~
time +
TB_stage+
condition+
rainfall+
YearCat+
storage+
seq_depth+
IndividID,
env = metadata, comm = otutable)
# weighted unifrac
wunifrac_asv_anova<-anova.cca(wunifrac_model_asv, by="terms")
wunifrac_asv_anova
## unweighted unifrac
unifrac_model_asv<-capscale(unifrac_dist_asv ~
time +
TB_stage+
condition+
rainfall+
YearCat+
storage+
seq_depth+
IndividID,
env = metadata, comm = otutable)
## unweighted unifrac ##
unifrac_asv_anova<-anova.cca(unifrac_model_asv, by="terms")
unifrac_asv_anova
bray_model_asv<-capscale(bray_dist_asv ~
time +
TB_stage+
condition+
rainfall+
YearCat+
storage+
seq_depth+
IndividID,
env = metadata, comm = otutable)
bray_asv_anova<-anova.cca(bray_model_asv, by="terms")
bray_asv_anova
This chunk took 35.02 minutes
##dominant taxa
dominant_phylo<-scaled_core
#dominant_phylo<-scaled_core_genus
sample_data(dominant_phylo)<-sample_data(dominant_phylo)[,"feature.id"]
# melt data
psmelt_df<-speedyseq::psmelt(dominant_phylo)
head(psmelt_df)
dominant_taxa_df<-psmelt_df %>%
group_by(feature.id) %>%
arrange(feature.id, -Abundance) %>%
dplyr::mutate(rank=rank(-Abundance))
head(dominant_taxa_df)
dominant_taxa_df<-subset(dominant_taxa_df, rank ==1)
dominant_taxa_df$Class<-factor(dominant_taxa_df$Class)
# add to metadata
sample_data(scaled_core)$DomClass <-vlookup(sample_data(scaled_core)$feature.id, dominant_taxa_df, lookup_column = "Sample", result_column = "Class")
top_class
## [1] "Clostridia" "Bacilli" "Bacteroidia"
## [4] "Coriobacteriia" "Actinobacteria" "Fusobacteriia"
## [7] "Gammaproteobacteria"
sample_data(scaled_core)$DomClass_plot<-ifelse(sample_data(scaled_core)$DomClass %in% top_class, as.character(sample_data(scaled_core)$DomClass), "Other")
unique(sample_data(scaled_core)$DomClass_plot)
## [1] "Clostridia" "Other" "Bacilli"
## [4] "Actinobacteria" "Gammaproteobacteria" "Coriobacteriia"
## [7] "Fusobacteriia" "Bacteroidia"
#sample_data(scaled_core)$DomClass_plot<-factor(sample_data(scaled_core)$DomClass_plot, levels =c(top_class, "Other"))
sample_data(scaled_core)$DomClass_plot<-factor(sample_data(scaled_core)$DomClass_plot, levels = c("Bacteroidia","Fusobacteriia" , "Other","Gammaproteobacteria", "Clostridia",
"Coriobacteriia","Actinobacteria", "Bacilli"))
pal<-brewer.pal(10,"Paired")
pal<-c(pal, "darkgrey")
pal<-pal[c(2,1,11,4,3,6,9,10)]
pal<-rev(pal)
This chunk took 0.01 minutes
######################################
## extract data from model
final_model_df<-scores(final_model)
str(final_model_df)
## List of 3
## $ species : num [1:499, 1:2] -2.34e-04 -1.57e-04 3.19e-06 1.82e-05 1.39e-04 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:499] "a29558bc215cfe6f913afafef02450b9" "de14e4fbd65f5ea24336f74fca93998d" "b1a40d7ca4ef789afb38c8de415893eb" "bd28d84ee5305bf27575469eb63ba948" ...
## .. ..$ : chr [1:2] "CAP1" "CAP2"
## $ sites : num [1:1141, 1:2] -0.658 -0.164 -0.795 1.257 0.517 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:1141] "Feature1" "Feature3" "Feature5" "Feature7" ...
## .. ..$ : chr [1:2] "CAP1" "CAP2"
## $ centroids: num [1:252, 1:2] 0.5301 -0.271 -0.0647 0.0524 -0.0639 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:252] "timeAFTERNOON" "timeMORNING" "TB_stageUnexposed" "TB_stageExposed" ...
## .. ..$ : chr [1:2] "CAP1" "CAP2"
## - attr(*, "const")= num 17.6
# extract CAP scores
vectors_df<-data.frame(final_model_df$sites)
vectors_df$feature.id<-row.names(vectors_df)
# merge with info on dominant family
sample_metadata<-data.frame(sample_data(scaled_core))[,c("feature.id", "DomClass_plot")]
vectors_df<-merge(vectors_df, sample_metadata, by = "feature.id")
#### add arrows #########
#### add arrows #########
#### add arrows #########
#### add arrows #########
centroids_df<-data.frame(final_model_df$centroids)
centroids_df<-centroids_df[c(1:5,7:8,10:15),]
#centroids_df<-centroids_df[c(1,2,4,6:10,12:15),]
centroids_df$Label<-c("Afternoon", "Morning", "TB unexposed", "TB exposed", "TB clinical signs", "Bad condition", "Good condition","High rainfall", "Low rainfall" , "2005-2010", "2010-2015", "2015-2019", "Pre-2005")
# scale so they are more plottable
centroids_df[1,1] <-centroids_df[1,1] *0.7
centroids_df[1,2] <-centroids_df[1,2] *0.7
centroids_df[13,1] <-centroids_df[13,1] *0.7
centroids_df[13,2] <-centroids_df[13,2] *0.7
centroids_df$CAP1<-centroids_df$CAP1 * 1.5
centroids_df$CAP2<-centroids_df$CAP2 * 1.5
ggplot(vectors_df, aes(x = CAP1, y = CAP2))+
geom_point(aes(fill = DomClass_plot), pch = 21, size = 4)+
scale_fill_manual(values = rev(pal))+
theme_bw()+
# add arrows
geom_segment(data=centroids_df, aes(x = 0, y = 0, xend = CAP1*3, yend = CAP2*3),
arrow = arrow(length = unit(0.5, "cm"), type = "closed"), lwd = 1, col = "black")+
ggrepel::geom_label_repel(data=centroids_df,
alpha = 0.9, col = "black", size = 3, fill = "yellow",
#hjust = c(0,1),
#vjust = c(1,1),
aes(CAP1 *3, CAP2*3, label = Label))
### add species scores ###
### add species scores ###
### add species scores ###
### add species scores ###
species_scores<-data.frame(final_model_df$species)
hist(species_scores$CAP1, breaks = 50)
hist(species_scores$CAP2, breaks = 50)
summary(species_scores$CAP1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -7.010728 -0.000455 0.000018 -0.028936 0.000136 0.110956
summary(species_scores$CAP2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.8140492 -0.0010509 -0.0000452 -0.0242252 0.0000738 0.5422269
species_scores<-subset(species_scores, CAP1 < -1 | CAP2 > 0.2 | CAP2 < -0.2)
#species_scores$CAP1<-species_scores$CAP1*0.8
#species_scores$CAP2<-species_scores$CAP2*0.8
species_scores$ASV<-row.names(species_scores)
taxonomy<-data.frame(tax_table(scaled_core))
head(taxonomy)
taxonomy$ASV<-row.names(taxonomy)
species_scores<-merge(species_scores, taxonomy, by = "ASV")
subset(species_scores, Genus == "Lactococcus")
#################
species_scores[4,2]<-species_scores[4,2]*0.2
species_scores[4,3]<-species_scores[4,3]*0.2
species_scores[5,2]<-species_scores[5,2]*0.8
species_scores[5,3]<-species_scores[5,3]*0.8
species_scores<-species_scores[-6,]
species_scores<-subset(species_scores,
Genus == "Clostridium sensu stricto 1" |
Genus == "Blautia" |
Genus == "Fusobacterium" |
Genus == "Bacteroides" |
Genus == "Phascolarctobacterium" |
Genus == "Lactococcus" |
Genus == "Paeniclostridium" |
Genus == "[Ruminococcus] torques group")
species_scores[6,3]<-species_scores[6,3]*0.8
species_scores[6,2]<-species_scores[6,2]*0.8
###################
###################
###################
###################
ord_plot<-ggplot(vectors_df, aes(x = CAP1, y = CAP2))+
geom_point(aes(fill = DomClass_plot), pch = 21, size = 3)+
scale_fill_manual(values = rev(pal))+
theme_bw()+
xlab("CAP1 (40 %)")+ylab("CAP2 (14%)")+
guides(fill = guide_legend(override.aes = list(size = 7)))+
labs(fill = "Bacterial class")+
# add arrows
geom_segment(data=centroids_df, aes(x = 0, y = 0, xend = CAP1*3, yend = CAP2*3),
arrow = arrow(length = unit(0.5, "cm"), type = "closed"), lwd = 1, col = "black")+
ggrepel::geom_label_repel(data=centroids_df,
alpha = 0.9, col = "black", size = 3, fill = "yellow",
aes(CAP1 *3, CAP2*3, label = Label))+
# add species
geom_point(data = species_scores, size = 4, col = "black", pch = 8, stroke = 0, alpha = 0)+
ggrepel::geom_label_repel(data=species_scores,
alpha = 0.9, col = "black", size = 2.5, fill = "skyblue",
aes( label = Genus))
ggarrange(ord_plot,year_plot, ncol = 2 , common.legend = T, legend = "right", widths = c(3.5,2), labels = c("a)", NA))
#ggsave("Figures/Fig3.pdf")
#Saving 11.6 x 5.91 in image
This chunk took 0.07 minutes
Make an ‘other’ category for rare taxa
genus_ps<-tax_glom(meerkat_filtered, taxrank = "Genus")
genus_abundances_df<-data.frame(taxa_sums(genus_ps))
genus_abundances_df$ASV<-row.names(genus_abundances_df)
taxonomy<-data.frame(tax_table(genus_ps))
taxonomy$ASV<-row.names(taxonomy)
genus_abundances_df<-merge(genus_abundances_df, taxonomy, by = "ASV")
names(genus_abundances_df)[2]<-"Abundance"
genus_abundances_df<-genus_abundances_df %>% dplyr::arrange(-Abundance)
top_genera<- as.character(genus_abundances_df$ASV[1:30])
sample_data(genus_ps)<-sample_data(genus_ps)[,c( "feature.id")]
top_genera_melt<-psmelt(genus_ps)
# make a category for rare genera
top_genera_melt$Genus2<-ifelse(top_genera_melt$OTU %in% top_genera, top_genera_melt$Genus, "Rare taxa")
top_genera_melt2<-top_genera_melt %>% group_by(Sample, Genus2) %>% summarize(Abundance2 = sum(Abundance))
metadata<-data.frame(sample_data(meerkat_filtered))
top_genera_melt3<- merge(top_genera_melt2, metadata, by.x = "Sample", by.y = "feature.id")
names(top_genera_melt3)
## [1] "Sample" "Genus2" "Abundance2"
## [4] "IndividID" "SampleDate" "SampleTime"
## [7] "Storage" "Seq_run" "Imtechella"
## [10] "Allobacillus" "Seq_depth" "scaling_factor"
## [13] "Seq_depth_nospikein" "BirthDate" "DeathDate"
## [16] "AgeAtSampling" "GroupAtSampling" "Condition_weight"
## [19] "SocialStatus" "Temp_max" "sum_rainfall_month"
## [22] "HoursAfterSunrise" "TimeCat" "Year"
## [25] "month" "Season" "TB_status"
## [28] "TB_exposure" "TB_resistance" "TB_survival"
## [31] "TB_symptom_date" "TB_exposure_date" "TB_stage"
## [34] "TotalAbundance"
head(top_genera_melt3)
This chunk took 0.11 minutes
# loop to fit GAMMs to each genus
year_estimates<-list()
taxanames<-unique(top_genera_melt3$Genus2)
for (i in 1:length(taxanames)){
tryCatch({ #catch errors
print(i)
print(taxanames[i])
taxa1<-subset(top_genera_melt3, Genus2 == taxanames[i])
# taxa1<-subset(top_genera_melt3, Genus2 == "Rare taxa")
metadata<-taxa1
# hist(log10(metadata$AgeAtSampling))
# hist(log10(metadata$TotalAbundance))
# hist(metadata$Seq_depth_nospikein)
# scale variables
metadata$AgeAtSampling<-as.numeric(scale(log10(metadata$AgeAtSampling)))
metadata$TotalAbundance<-as.numeric(scale(log10(metadata$TotalAbundance)))
metadata$Seq_depth_nospikein<-as.numeric(scale(log10(metadata$Seq_depth_nospikein)))
# fit gamm
m_taxa <- mgcv::bam(log10(Abundance2+1)~
s(HoursAfterSunrise, bs = "cr")+
s(month, bs = "cc")+
s(AgeAtSampling, bs = "cr")+
s(Seq_depth_nospikein, bs = "cr")+
s(Year, bs = "cr", k = 5)+
s(TotalAbundance, bs = "cr")+
Storage+
Seq_run+
s(IndividID, bs = "re")+
s(GroupAtSampling, bs = "re"),
#correlation = corARMA(form = ~ 1|Year, p = 3),
data=metadata,
family = gaussian)
print(summary(m_taxa))
gam.check(m_taxa)
confint_df_year<-stats::confint(m_taxa, parm = "s(Year)", type = "confidence")
confint_df_year$Taxa<-taxanames[i]
confint_df<-confint_df_year
# convert into real estimate by adding the intercapt (= mean abundance)
confint_df$est_real<-confint_df$est+summary(m_taxa)$p.coeff[1]
confint_df$lower_real<- confint_df$lower+summary(m_taxa)$p.coeff[1]
confint_df$upper_real<- confint_df$upper+summary(m_taxa)$p.coeff[1]
## add effect size and p value
summary<-summary(m_taxa)
confint_df$effect_size<- summary$s.table[5,3]
confint_df$P_val<- summary$s.table[5,4]
year_estimates[[i]]<-confint_df
}, error=function(e){})
}
## [1] 1
## [1] "[Ruminococcus] torques group"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.16009 0.05302 59.603 <2e-16 ***
## StorageFROZEN -0.14738 0.07757 -1.900 0.0577 .
## Seq_runRUN2 0.05483 0.06743 0.813 0.4163
## Seq_runRUN3 0.14268 0.07035 2.028 0.0428 *
## Seq_runRUN4 0.15657 0.15048 1.040 0.2984
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 1.000e+00 1.000 3.442 0.06382 .
## s(month) 3.727e+00 8.000 1.192 0.02775 *
## s(AgeAtSampling) 2.644e+00 3.289 7.748 2.73e-05 ***
## s(Seq_depth_nospikein) 2.638e+00 3.373 1.592 0.24084
## s(Year) 2.234e+00 2.720 4.969 0.00451 **
## s(TotalAbundance) 4.237e+00 5.159 147.411 < 2e-16 ***
## s(IndividID) 1.134e+01 234.000 0.052 0.22592
## s(GroupAtSampling) 1.037e-04 41.000 0.000 0.46613
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.619 Deviance explained = 63%
## fREML = 1255.3 Scale est. = 0.5038 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 14 iterations.
## Gradient range [-2.213887e-06,5.168926e-06]
## (score 1255.34 & scale 0.5037998).
## Hessian positive definite, eigenvalue range [2.131302e-06,565.5707].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00e+00 1.00e+00 0.93 0.015 *
## s(month) 8.00e+00 3.73e+00 0.98 0.205
## s(AgeAtSampling) 9.00e+00 2.64e+00 1.01 0.545
## s(Seq_depth_nospikein) 9.00e+00 2.64e+00 0.97 0.105
## s(Year) 4.00e+00 2.23e+00 0.97 0.170
## s(TotalAbundance) 9.00e+00 4.24e+00 0.93 <2e-16 ***
## s(IndividID) 2.35e+02 1.13e+01 NA NA
## s(GroupAtSampling) 4.20e+01 1.04e-04 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 2
## [1] "Alloprevotella"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.84199 0.09407 8.951 <2e-16 ***
## StorageFROZEN 0.05472 0.11404 0.480 0.631
## Seq_runRUN2 0.11416 0.11538 0.989 0.323
## Seq_runRUN3 -0.05323 0.11832 -0.450 0.653
## Seq_runRUN4 -0.08494 0.18080 -0.470 0.639
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 3.01031 3.649 4.022 0.00636 **
## s(month) 0.06256 8.000 0.008 0.34233
## s(AgeAtSampling) 3.74623 4.532 8.324 6.83e-07 ***
## s(Seq_depth_nospikein) 1.00002 1.000 5.076 0.02446 *
## s(Year) 2.65771 3.128 10.148 1.50e-06 ***
## s(TotalAbundance) 4.37383 5.277 40.765 < 2e-16 ***
## s(IndividID) 59.05731 234.000 0.408 1.79e-05 ***
## s(GroupAtSampling) 7.68720 41.000 0.573 0.00444 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.428 Deviance explained = 47.1%
## fREML = 1635.1 Scale est. = 0.9264 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 14 iterations.
## Gradient range [-5.348667e-06,7.043528e-06]
## (score 1635.138 & scale 0.9264015).
## Hessian positive definite, eigenvalue range [5.34862e-06,567.1029].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.0000 3.0103 0.98 0.28
## s(month) 8.0000 0.0626 0.97 0.20
## s(AgeAtSampling) 9.0000 3.7462 0.97 0.14
## s(Seq_depth_nospikein) 9.0000 1.0000 0.93 <2e-16 ***
## s(Year) 4.0000 2.6577 0.97 0.09 .
## s(TotalAbundance) 9.0000 4.3738 1.01 0.65
## s(IndividID) 235.0000 59.0573 NA NA
## s(GroupAtSampling) 42.0000 7.6872 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 3
## [1] "Bacillus"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.14964 0.09511 12.088 <2e-16 ***
## StorageFROZEN -0.16268 0.12760 -1.275 0.203
## Seq_runRUN2 0.01438 0.11933 0.120 0.904
## Seq_runRUN3 -0.15825 0.12391 -1.277 0.202
## Seq_runRUN4 -0.24584 0.21424 -1.147 0.251
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 4.097 5.004 11.269 < 2e-16 ***
## s(month) 5.657 8.000 9.546 < 2e-16 ***
## s(AgeAtSampling) 2.655 3.288 3.452 0.01381 *
## s(Seq_depth_nospikein) 1.000 1.000 4.438 0.03537 *
## s(Year) 1.467 1.750 5.229 0.00649 **
## s(TotalAbundance) 1.000 1.000 0.128 0.72048
## s(IndividID) 23.244 234.000 0.115 0.10057
## s(GroupAtSampling) 7.010 41.000 0.338 0.02286 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.175 Deviance explained = 21.1%
## fREML = 1828 Scale est. = 1.3538 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 16 iterations.
## Gradient range [-7.876963e-06,3.826142e-06]
## (score 1828.015 & scale 1.353826).
## Hessian positive definite, eigenvalue range [1.562667e-06,565.7809].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00 4.10 1.03 0.830
## s(month) 8.00 5.66 0.93 0.005 **
## s(AgeAtSampling) 9.00 2.65 0.96 0.075 .
## s(Seq_depth_nospikein) 9.00 1.00 0.97 0.240
## s(Year) 4.00 1.47 0.88 <2e-16 ***
## s(TotalAbundance) 9.00 1.00 1.02 0.715
## s(IndividID) 235.00 23.24 NA NA
## s(GroupAtSampling) 42.00 7.01 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 4
## [1] "Bacteroides"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.21226 0.07292 30.338 <2e-16 ***
## StorageFROZEN -0.02333 0.11166 -0.209 0.8345
## Seq_runRUN2 0.19027 0.09276 2.051 0.0405 *
## Seq_runRUN3 0.23291 0.09474 2.458 0.0141 *
## Seq_runRUN4 0.28937 0.16815 1.721 0.0855 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 1.000e+00 1.000 8.821 0.00304 **
## s(month) 5.725e-01 8.000 0.090 0.27571
## s(AgeAtSampling) 1.658e+00 2.070 1.307 0.28459
## s(Seq_depth_nospikein) 1.000e+00 1.000 2.011 0.15648
## s(Year) 2.672e+00 3.210 15.806 < 2e-16 ***
## s(TotalAbundance) 4.422e+00 5.363 123.872 < 2e-16 ***
## s(IndividID) 3.463e+00 234.000 0.015 0.38893
## s(GroupAtSampling) 9.076e-06 41.000 0.000 0.88534
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.583 Deviance explained = 58.9%
## fREML = 1648.9 Scale est. = 1.028 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 15 iterations.
## Gradient range [-1.077949e-05,1.945813e-05]
## (score 1648.874 & scale 1.027997).
## Hessian positive definite, eigenvalue range [1.890004e-06,565.512].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00e+00 1.00e+00 0.97 0.20
## s(month) 8.00e+00 5.73e-01 0.97 0.12
## s(AgeAtSampling) 9.00e+00 1.66e+00 1.00 0.48
## s(Seq_depth_nospikein) 9.00e+00 1.00e+00 0.96 0.08 .
## s(Year) 4.00e+00 2.67e+00 0.95 0.06 .
## s(TotalAbundance) 9.00e+00 4.42e+00 0.96 0.14
## s(IndividID) 2.35e+02 3.46e+00 NA NA
## s(GroupAtSampling) 4.20e+01 9.08e-06 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 5
## [1] "Blautia"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.38695 0.05409 62.622 < 2e-16 ***
## StorageFROZEN -0.33270 0.07220 -4.608 4.55e-06 ***
## Seq_runRUN2 0.05496 0.06701 0.820 0.4123
## Seq_runRUN3 0.12524 0.06949 1.802 0.0718 .
## Seq_runRUN4 0.05389 0.14904 0.362 0.7177
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 1.000 1.000 0.005 0.94171
## s(month) 2.789 8.000 1.931 0.00068 ***
## s(AgeAtSampling) 2.061 2.574 1.072 0.46836
## s(Seq_depth_nospikein) 3.200 4.010 2.410 0.04721 *
## s(Year) 1.000 1.000 0.044 0.83322
## s(TotalAbundance) 1.103 1.196 568.128 < 2e-16 ***
## s(IndividID) 17.424 234.000 0.084 0.14665
## s(GroupAtSampling) 8.223 41.000 0.431 0.00848 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.579 Deviance explained = 59.4%
## fREML = 1193.4 Scale est. = 0.44926 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 16 iterations.
## Gradient range [-6.576772e-06,1.473204e-05]
## (score 1193.422 & scale 0.4492574).
## Hessian positive definite, eigenvalue range [1.620849e-06,565.6705].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00 1.00 0.99 0.350
## s(month) 8.00 2.79 0.94 0.045 *
## s(AgeAtSampling) 9.00 2.06 1.00 0.480
## s(Seq_depth_nospikein) 9.00 3.20 1.00 0.470
## s(Year) 4.00 1.00 0.96 0.060 .
## s(TotalAbundance) 9.00 1.10 0.93 0.005 **
## s(IndividID) 235.00 17.42 NA NA
## s(GroupAtSampling) 42.00 8.22 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 6
## [1] "Catenisphaera"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.06294 0.06581 31.348 < 2e-16 ***
## StorageFROZEN -0.14159 0.08832 -1.603 0.10920
## Seq_runRUN2 0.07932 0.08552 0.928 0.35387
## Seq_runRUN3 0.26400 0.08928 2.957 0.00318 **
## Seq_runRUN4 0.35565 0.14159 2.512 0.01216 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 1.000e+00 1.000 0.040 0.840699
## s(month) 1.304e+00 8.000 0.306 0.146684
## s(AgeAtSampling) 5.431e+00 6.365 12.103 < 2e-16 ***
## s(Seq_depth_nospikein) 1.000e+00 1.000 0.084 0.771489
## s(Year) 1.527e+00 1.829 0.800 0.343250
## s(TotalAbundance) 3.946e+00 4.820 40.263 < 2e-16 ***
## s(IndividID) 4.490e+01 234.000 0.269 0.000905 ***
## s(GroupAtSampling) 3.758e-05 41.000 0.000 0.608065
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.309 Deviance explained = 34.7%
## fREML = 1413.4 Scale est. = 0.6442 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 14 iterations.
## Gradient range [-3.093541e-06,1.251552e-05]
## (score 1413.437 & scale 0.6442027).
## Hessian positive definite, eigenvalue range [1.990588e-06,566.4137].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00e+00 1.00e+00 1.00 0.415
## s(month) 8.00e+00 1.30e+00 0.96 0.095 .
## s(AgeAtSampling) 9.00e+00 5.43e+00 1.01 0.610
## s(Seq_depth_nospikein) 9.00e+00 1.00e+00 0.99 0.435
## s(Year) 4.00e+00 1.53e+00 0.96 0.045 *
## s(TotalAbundance) 9.00e+00 3.95e+00 0.96 0.075 .
## s(IndividID) 2.35e+02 4.49e+01 NA NA
## s(GroupAtSampling) 4.20e+01 3.76e-05 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 7
## [1] "Clostridium sensu stricto 1"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.83482 0.05499 69.733 < 2e-16 ***
## StorageFROZEN 0.03648 0.08352 0.437 0.662328
## Seq_runRUN2 -0.27453 0.06910 -3.973 7.56e-05 ***
## Seq_runRUN3 -0.24930 0.07080 -3.521 0.000447 ***
## Seq_runRUN4 -0.40271 0.18070 -2.229 0.026043 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 4.942 5.967 20.996 <2e-16 ***
## s(month) 1.991 8.000 0.902 0.0121 *
## s(AgeAtSampling) 1.000 1.000 0.012 0.9117
## s(Seq_depth_nospikein) 3.484 4.336 2.864 0.0238 *
## s(Year) 2.232 2.722 2.222 0.1183
## s(TotalAbundance) 5.816 6.763 133.951 <2e-16 ***
## s(IndividID) 5.572 234.000 0.025 0.3659
## s(GroupAtSampling) 1.584 41.000 0.045 0.2788
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.712 Deviance explained = 72%
## fREML = 1347.3 Scale est. = 0.58937 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 10 iterations.
## Gradient range [-3.404026e-06,6.68899e-06]
## (score 1347.279 & scale 0.5893748).
## Hessian positive definite, eigenvalue range [3.403997e-06,565.5371].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00 4.94 1.01 0.625
## s(month) 8.00 1.99 0.94 0.020 *
## s(AgeAtSampling) 9.00 1.00 1.03 0.840
## s(Seq_depth_nospikein) 9.00 3.48 1.01 0.655
## s(Year) 4.00 2.23 0.95 0.035 *
## s(TotalAbundance) 9.00 5.82 0.99 0.335
## s(IndividID) 235.00 5.57 NA NA
## s(GroupAtSampling) 42.00 1.58 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 8
## [1] "Collinsella"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.28429 0.07845 29.117 < 2e-16 ***
## StorageFROZEN -0.34675 0.09940 -3.488 0.000505 ***
## Seq_runRUN2 0.48959 0.09173 5.337 1.15e-07 ***
## Seq_runRUN3 0.73036 0.09546 7.651 4.33e-14 ***
## Seq_runRUN4 0.90716 0.18257 4.969 7.80e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 3.0061 3.689 1.327 0.22705
## s(month) 3.1633 8.000 2.602 5.55e-05 ***
## s(AgeAtSampling) 2.7874 3.463 1.662 0.16798
## s(Seq_depth_nospikein) 1.6920 2.161 0.086 0.91239
## s(Year) 1.0000 1.000 9.332 0.00231 **
## s(TotalAbundance) 5.0041 5.966 35.716 < 2e-16 ***
## s(IndividID) 0.3996 234.000 0.002 0.46454
## s(GroupAtSampling) 13.0765 41.000 0.945 7.12e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.309 Deviance explained = 33%
## fREML = 1560.1 Scale est. = 0.85559 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 14 iterations.
## Gradient range [-3.06861e-06,4.964849e-07]
## (score 1560.124 & scale 0.8555855).
## Hessian positive definite, eigenvalue range [3.068596e-06,565.5911].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00 3.01 1.03 0.83
## s(month) 8.00 3.16 0.89 <2e-16 ***
## s(AgeAtSampling) 9.00 2.79 0.97 0.13
## s(Seq_depth_nospikein) 9.00 1.69 1.00 0.42
## s(Year) 4.00 1.00 0.90 <2e-16 ***
## s(TotalAbundance) 9.00 5.00 0.95 0.05 *
## s(IndividID) 235.00 0.40 NA NA
## s(GroupAtSampling) 42.00 13.08 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 9
## [1] "Enterococcus"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.278504 0.059169 55.409 < 2e-16 ***
## StorageFROZEN -0.285034 0.079082 -3.604 0.000327 ***
## Seq_runRUN2 -0.024265 0.075947 -0.319 0.749415
## Seq_runRUN3 0.019965 0.078801 0.253 0.800036
## Seq_runRUN4 -0.006933 0.130806 -0.053 0.957739
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 3.023 3.694 11.164 < 2e-16 ***
## s(month) 2.814 8.000 1.937 0.000509 ***
## s(AgeAtSampling) 3.109 3.818 8.445 2.38e-06 ***
## s(Seq_depth_nospikein) 1.000 1.000 0.277 0.598937
## s(Year) 1.000 1.000 11.925 0.000575 ***
## s(TotalAbundance) 3.255 4.037 126.241 < 2e-16 ***
## s(IndividID) 33.616 234.000 0.180 0.025853 *
## s(GroupAtSampling) 4.922 41.000 0.178 0.170204
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.474 Deviance explained = 50%
## fREML = 1294.1 Scale est. = 0.5268 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 13 iterations.
## Gradient range [-4.166439e-06,3.280395e-06]
## (score 1294.116 & scale 0.5268041).
## Hessian positive definite, eigenvalue range [1.469094e-06,566.0221].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00 3.02 1.02 0.755
## s(month) 8.00 2.81 0.99 0.410
## s(AgeAtSampling) 9.00 3.11 1.01 0.630
## s(Seq_depth_nospikein) 9.00 1.00 0.98 0.205
## s(Year) 4.00 1.00 0.96 0.085 .
## s(TotalAbundance) 9.00 3.25 1.00 0.465
## s(IndividID) 235.00 33.62 NA NA
## s(GroupAtSampling) 42.00 4.92 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 10
## [1] "Escherichia-Shigella"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.15855 0.11140 10.400 <2e-16 ***
## StorageFROZEN -0.13684 0.13304 -1.029 0.304
## Seq_runRUN2 0.04999 0.12218 0.409 0.683
## Seq_runRUN3 0.08094 0.12406 0.652 0.514
## Seq_runRUN4 0.18356 0.21418 0.857 0.392
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 2.859 3.492 3.921 0.00803 **
## s(month) 1.372 8.000 0.369 0.10559
## s(AgeAtSampling) 1.000 1.000 9.195 0.00248 **
## s(Seq_depth_nospikein) 1.000 1.000 4.445 0.03523 *
## s(Year) 1.257 1.442 2.870 0.13666
## s(TotalAbundance) 3.938 4.824 32.140 < 2e-16 ***
## s(IndividID) 13.125 234.000 0.063 0.19657
## s(GroupAtSampling) 16.766 41.000 1.902 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.291 Deviance explained = 31.9%
## fREML = 1864.5 Scale est. = 1.4531 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 13 iterations.
## Gradient range [-4.267278e-06,3.164214e-06]
## (score 1864.493 & scale 1.453113).
## Hessian positive definite, eigenvalue range [2.358056e-06,565.7079].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00 2.86 0.99 0.26
## s(month) 8.00 1.37 0.91 <2e-16 ***
## s(AgeAtSampling) 9.00 1.00 0.98 0.25
## s(Seq_depth_nospikein) 9.00 1.00 0.97 0.16
## s(Year) 4.00 1.26 0.92 <2e-16 ***
## s(TotalAbundance) 9.00 3.94 1.02 0.67
## s(IndividID) 235.00 13.12 NA NA
## s(GroupAtSampling) 42.00 16.77 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 11
## [1] "Fusobacterium"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.913430 0.078812 24.278 <2e-16 ***
## StorageFROZEN 0.270403 0.109012 2.480 0.0133 *
## Seq_runRUN2 0.101523 0.099840 1.017 0.3094
## Seq_runRUN3 0.181202 0.102646 1.765 0.0778 .
## Seq_runRUN4 -0.008187 0.174012 -0.047 0.9625
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 1.000e+00 1.00 0.667 0.4143
## s(month) 3.980e-06 8.00 0.000 0.4522
## s(AgeAtSampling) 1.466e+00 1.79 2.026 0.2189
## s(Seq_depth_nospikein) 1.000e+00 1.00 3.565 0.0593 .
## s(Year) 1.000e+00 1.00 44.880 <2e-16 ***
## s(TotalAbundance) 4.553e+00 5.49 85.231 <2e-16 ***
## s(IndividID) 1.819e+01 234.00 0.090 0.1073
## s(GroupAtSampling) 7.247e+00 41.00 0.339 0.0249 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.489 Deviance explained = 50.6%
## fREML = 1668.4 Scale est. = 1.0436 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 15 iterations.
## Gradient range [-4.28469e-06,5.70973e-06]
## (score 1668.442 & scale 1.04357).
## Hessian positive definite, eigenvalue range [3.798488e-07,565.6756].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00e+00 1.00e+00 0.98 0.22
## s(month) 8.00e+00 3.98e-06 0.90 <2e-16 ***
## s(AgeAtSampling) 9.00e+00 1.47e+00 0.98 0.20
## s(Seq_depth_nospikein) 9.00e+00 1.00e+00 0.98 0.24
## s(Year) 4.00e+00 1.00e+00 0.89 <2e-16 ***
## s(TotalAbundance) 9.00e+00 4.55e+00 0.97 0.16
## s(IndividID) 2.35e+02 1.82e+01 NA NA
## s(GroupAtSampling) 4.20e+01 7.25e+00 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 12
## [1] "Geodermatophilus"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.180445 0.087791 24.837 < 2e-16 ***
## StorageFROZEN -0.295817 0.094428 -3.133 0.00178 **
## Seq_runRUN2 0.018892 0.088097 0.214 0.83024
## Seq_runRUN3 0.001402 0.090936 0.015 0.98771
## Seq_runRUN4 0.152551 0.198734 0.768 0.44288
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 3.508 4.278 4.803 0.000719 ***
## s(month) 1.729 8.000 0.627 0.047863 *
## s(AgeAtSampling) 1.755 2.189 1.364 0.253926
## s(Seq_depth_nospikein) 3.526 4.373 1.668 0.143022
## s(Year) 1.726 2.104 0.388 0.624677
## s(TotalAbundance) 3.711 4.573 5.993 4.16e-05 ***
## s(IndividID) 8.983 234.000 0.041 0.278556
## s(GroupAtSampling) 20.209 41.000 2.426 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.142 Deviance explained = 17.9%
## fREML = 1466.2 Scale est. = 0.70983 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 12 iterations.
## Gradient range [-6.279772e-07,3.761228e-06]
## (score 1466.224 & scale 0.7098257).
## Hessian positive definite, eigenvalue range [0.09038299,565.7287].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00 3.51 0.98 0.205
## s(month) 8.00 1.73 0.93 0.005 **
## s(AgeAtSampling) 9.00 1.75 0.99 0.290
## s(Seq_depth_nospikein) 9.00 3.53 1.03 0.845
## s(Year) 4.00 1.73 0.91 <2e-16 ***
## s(TotalAbundance) 9.00 3.71 1.02 0.720
## s(IndividID) 235.00 8.98 NA NA
## s(GroupAtSampling) 42.00 20.21 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 13
## [1] "Helicobacter"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.27899 0.06774 18.881 <2e-16 ***
## StorageFROZEN 0.09823 0.09459 1.038 0.299
## Seq_runRUN2 0.02925 0.08574 0.341 0.733
## Seq_runRUN3 -0.03289 0.08707 -0.378 0.706
## Seq_runRUN4 0.10232 0.15224 0.672 0.502
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 3.3386 4.054 6.999 1.46e-05 ***
## s(month) 0.8524 8.000 0.166 0.19873
## s(AgeAtSampling) 1.0000 1.000 10.103 0.00152 **
## s(Seq_depth_nospikein) 1.0000 1.000 2.039 0.15364
## s(Year) 1.9995 2.417 13.132 6.28e-07 ***
## s(TotalAbundance) 4.1493 5.046 77.050 < 2e-16 ***
## s(IndividID) 37.3367 234.000 0.214 0.00509 **
## s(GroupAtSampling) 4.0853 41.000 0.152 0.14830
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.509 Deviance explained = 53.4%
## fREML = 1467.6 Scale est. = 0.71607 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 14 iterations.
## Gradient range [-6.201009e-06,6.024533e-06]
## (score 1467.63 & scale 0.7160725).
## Hessian positive definite, eigenvalue range [3.802967e-06,566.1358].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.000 3.339 0.99 0.40
## s(month) 8.000 0.852 0.99 0.41
## s(AgeAtSampling) 9.000 1.000 1.00 0.46
## s(Seq_depth_nospikein) 9.000 1.000 0.98 0.21
## s(Year) 4.000 2.000 1.00 0.52
## s(TotalAbundance) 9.000 4.149 1.02 0.74
## s(IndividID) 235.000 37.337 NA NA
## s(GroupAtSampling) 42.000 4.085 NA NA
## [1] 14
## [1] "Lachnoclostridium"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.93416 0.05215 56.263 <2e-16 ***
## StorageFROZEN -0.04383 0.06739 -0.650 0.516
## Seq_runRUN2 0.08676 0.06295 1.378 0.168
## Seq_runRUN3 0.10591 0.06575 1.611 0.108
## Seq_runRUN4 -0.04891 0.13412 -0.365 0.715
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 1.000e+00 1.000 1.472 0.225288
## s(month) 7.714e-05 8.000 0.000 0.389100
## s(AgeAtSampling) 3.088e+00 3.807 23.713 < 2e-16 ***
## s(Seq_depth_nospikein) 2.736e+00 3.484 1.650 0.215014
## s(Year) 1.516e+00 1.825 2.829 0.126870
## s(TotalAbundance) 1.885e+00 2.374 227.034 < 2e-16 ***
## s(IndividID) 1.024e+01 234.000 0.047 0.260105
## s(GroupAtSampling) 1.033e+01 41.000 0.620 0.000855 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.528 Deviance explained = 54.3%
## fREML = 1109.9 Scale est. = 0.38998 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 16 iterations.
## Gradient range [-2.120786e-06,3.035296e-06]
## (score 1109.937 & scale 0.3899831).
## Hessian positive definite, eigenvalue range [1.576529e-06,565.5975].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00e+00 1.00e+00 0.97 0.180
## s(month) 8.00e+00 7.71e-05 1.03 0.770
## s(AgeAtSampling) 9.00e+00 3.09e+00 1.03 0.850
## s(Seq_depth_nospikein) 9.00e+00 2.74e+00 0.97 0.105
## s(Year) 4.00e+00 1.52e+00 1.05 0.965
## s(TotalAbundance) 9.00e+00 1.88e+00 0.94 0.035 *
## s(IndividID) 2.35e+02 1.02e+01 NA NA
## s(GroupAtSampling) 4.20e+01 1.03e+01 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 15
## [1] "Lachnospiraceae NK4A136 group"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.20101 0.07478 16.061 < 2e-16 ***
## StorageFROZEN -0.06190 0.10840 -0.571 0.56809
## Seq_runRUN2 0.26240 0.09849 2.664 0.00783 **
## Seq_runRUN3 0.30582 0.10300 2.969 0.00305 **
## Seq_runRUN4 0.04675 0.17507 0.267 0.78948
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 1.0000 1.000 0.767 0.38130
## s(month) 1.0440 8.000 0.219 0.17638
## s(AgeAtSampling) 2.9452 3.642 3.330 0.01309 *
## s(Seq_depth_nospikein) 1.0000 1.000 7.796 0.00533 **
## s(Year) 1.0000 1.000 9.643 0.00195 **
## s(TotalAbundance) 4.1793 5.092 55.161 < 2e-16 ***
## s(IndividID) 16.5895 234.000 0.080 0.14583
## s(GroupAtSampling) 0.5576 41.000 0.014 0.38056
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.382 Deviance explained = 40%
## fREML = 1676.5 Scale est. = 1.0661 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 13 iterations.
## Gradient range [-5.099451e-06,1.992226e-06]
## (score 1676.5 & scale 1.066101).
## Hessian positive definite, eigenvalue range [1.620128e-06,565.6286].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.000 1.000 0.96 0.085 .
## s(month) 8.000 1.044 0.91 0.005 **
## s(AgeAtSampling) 9.000 2.945 1.03 0.820
## s(Seq_depth_nospikein) 9.000 1.000 0.96 0.110
## s(Year) 4.000 1.000 0.91 <2e-16 ***
## s(TotalAbundance) 9.000 4.179 1.03 0.800
## s(IndividID) 235.000 16.590 NA NA
## s(GroupAtSampling) 42.000 0.558 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 16
## [1] "Lactococcus"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.89378 0.11357 16.675 < 2e-16 ***
## StorageFROZEN -0.42105 0.15040 -2.799 0.00521 **
## Seq_runRUN2 0.03619 0.12877 0.281 0.77870
## Seq_runRUN3 -0.06716 0.12896 -0.521 0.60260
## Seq_runRUN4 -0.06003 0.23638 -0.254 0.79958
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 1.000e+00 1.000 1.994 0.158157
## s(month) 4.166e+00 8.000 2.890 0.000109 ***
## s(AgeAtSampling) 1.432e+00 1.746 16.129 2.4e-05 ***
## s(Seq_depth_nospikein) 1.370e+00 1.667 0.147 0.830767
## s(Year) 3.373e+00 3.776 11.309 < 2e-16 ***
## s(TotalAbundance) 3.119e+00 3.894 13.486 < 2e-16 ***
## s(IndividID) 1.249e-04 234.000 0.000 0.628088
## s(GroupAtSampling) 1.305e+01 41.000 0.788 0.000170 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.177 Deviance explained = 20%
## fREML = 1933.8 Scale est. = 1.6628 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 21 iterations.
## Gradient range [-2.868307e-06,2.005942e-05]
## (score 1933.837 & scale 1.662795).
## Hessian positive definite, eigenvalue range [2.476436e-06,565.588].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00e+00 1.00e+00 1.03 0.845
## s(month) 8.00e+00 4.17e+00 0.94 0.010 **
## s(AgeAtSampling) 9.00e+00 1.43e+00 0.97 0.125
## s(Seq_depth_nospikein) 9.00e+00 1.37e+00 1.04 0.915
## s(Year) 4.00e+00 3.37e+00 0.94 0.015 *
## s(TotalAbundance) 9.00e+00 3.12e+00 1.05 0.965
## s(IndividID) 2.35e+02 1.25e-04 NA NA
## s(GroupAtSampling) 4.20e+01 1.30e+01 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 17
## [1] "Marvinbryantia"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.13559 0.08924 23.932 <2e-16 ***
## StorageFROZEN -0.25773 0.10691 -2.411 0.0161 *
## Seq_runRUN2 0.05836 0.10226 0.571 0.5684
## Seq_runRUN3 0.22162 0.10575 2.096 0.0363 *
## Seq_runRUN4 -0.05776 0.17452 -0.331 0.7407
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 1.000e+00 1.000 16.195 6.18e-05 ***
## s(month) 2.224e-05 8.000 0.000 0.8738
## s(AgeAtSampling) 3.551e+00 4.332 18.484 < 2e-16 ***
## s(Seq_depth_nospikein) 1.293e+00 1.534 2.438 0.1811
## s(Year) 2.300e+00 2.766 1.769 0.2280
## s(TotalAbundance) 3.598e+00 4.435 61.258 < 2e-16 ***
## s(IndividID) 2.142e+01 234.000 0.111 0.0611 .
## s(GroupAtSampling) 1.418e+01 41.000 1.126 1.59e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.475 Deviance explained = 49.8%
## fREML = 1588.1 Scale est. = 0.8876 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 12 iterations.
## Gradient range [-8.95451e-06,2.1666e-06]
## (score 1588.072 & scale 0.8875976).
## Hessian positive definite, eigenvalue range [1.87015e-06,565.7994].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00e+00 1.00e+00 1.01 0.55
## s(month) 8.00e+00 2.22e-05 0.98 0.23
## s(AgeAtSampling) 9.00e+00 3.55e+00 1.05 0.94
## s(Seq_depth_nospikein) 9.00e+00 1.29e+00 0.99 0.40
## s(Year) 4.00e+00 2.30e+00 0.98 0.26
## s(TotalAbundance) 9.00e+00 3.60e+00 0.97 0.14
## s(IndividID) 2.35e+02 2.14e+01 NA NA
## s(GroupAtSampling) 4.20e+01 1.42e+01 NA NA
## [1] 18
## [1] "Paeniclostridium"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.49694 0.10139 24.627 <2e-16 ***
## StorageFROZEN 0.04597 0.14341 0.321 0.7486
## Seq_runRUN2 -0.25263 0.12579 -2.008 0.0448 *
## Seq_runRUN3 -0.19471 0.12887 -1.511 0.1311
## Seq_runRUN4 -0.09860 0.23997 -0.411 0.6812
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 4.1222 5.033 17.306 < 2e-16 ***
## s(month) 0.1845 8.000 0.025 0.3283
## s(AgeAtSampling) 1.1339 1.253 17.333 3.35e-05 ***
## s(Seq_depth_nospikein) 1.0000 1.000 0.384 0.5356
## s(Year) 1.5779 1.916 0.480 0.6064
## s(TotalAbundance) 3.2783 4.077 27.978 < 2e-16 ***
## s(IndividID) 6.7377 234.000 0.030 0.3295
## s(GroupAtSampling) 8.1144 41.000 0.388 0.0114 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.33 Deviance explained = 34.7%
## fREML = 1967.6 Scale est. = 1.7825 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 11 iterations.
## Gradient range [-1.156708e-05,3.012972e-06]
## (score 1967.551 & scale 1.782474).
## Hessian positive definite, eigenvalue range [1.156693e-05,565.5561].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.000 4.122 1.04 0.895
## s(month) 8.000 0.184 0.97 0.105
## s(AgeAtSampling) 9.000 1.134 1.04 0.920
## s(Seq_depth_nospikein) 9.000 1.000 1.00 0.450
## s(Year) 4.000 1.578 0.94 0.025 *
## s(TotalAbundance) 9.000 3.278 1.03 0.870
## s(IndividID) 235.000 6.738 NA NA
## s(GroupAtSampling) 42.000 8.114 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 19
## [1] "Parabacteroides"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.07191 0.06717 15.958 <2e-16 ***
## StorageFROZEN 0.04670 0.09775 0.478 0.633
## Seq_runRUN2 0.09750 0.08509 1.146 0.252
## Seq_runRUN3 0.10732 0.08664 1.239 0.216
## Seq_runRUN4 -0.02433 0.17493 -0.139 0.889
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 3.533 4.309 5.505 0.000172 ***
## s(month) 1.970 8.000 0.783 0.023550 *
## s(AgeAtSampling) 1.000 1.000 0.017 0.897581
## s(Seq_depth_nospikein) 1.643 2.082 4.828 0.007821 **
## s(Year) 2.081 2.525 15.737 < 2e-16 ***
## s(TotalAbundance) 4.525 5.457 71.820 < 2e-16 ***
## s(IndividID) 23.145 234.000 0.116 0.077786 .
## s(GroupAtSampling) 3.190 41.000 0.111 0.153944
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.507 Deviance explained = 52.7%
## fREML = 1512.1 Scale est. = 0.78401 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 14 iterations.
## Gradient range [-7.820569e-06,1.601191e-06]
## (score 1512.073 & scale 0.7840062).
## Hessian positive definite, eigenvalue range [7.820453e-06,565.7527].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00 3.53 1.02 0.705
## s(month) 8.00 1.97 0.92 <2e-16 ***
## s(AgeAtSampling) 9.00 1.00 1.00 0.520
## s(Seq_depth_nospikein) 9.00 1.64 1.02 0.700
## s(Year) 4.00 2.08 0.93 0.010 **
## s(TotalAbundance) 9.00 4.53 0.96 0.085 .
## s(IndividID) 235.00 23.14 NA NA
## s(GroupAtSampling) 42.00 3.19 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 20
## [1] "Pediococcus"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.93587 0.09576 9.773 < 2e-16 ***
## StorageFROZEN -0.40785 0.11925 -3.420 0.000649 ***
## Seq_runRUN2 -0.01448 0.10858 -0.133 0.893914
## Seq_runRUN3 -0.08459 0.11032 -0.767 0.443416
## Seq_runRUN4 -0.05155 0.20586 -0.250 0.802314
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 5.281 6.313 1.792 0.093559 .
## s(month) 2.461 8.000 1.612 0.001556 **
## s(AgeAtSampling) 1.000 1.000 1.254 0.263104
## s(Seq_depth_nospikein) 1.000 1.000 0.000 0.984903
## s(Year) 1.000 1.000 12.398 0.000447 ***
## s(TotalAbundance) 1.000 1.000 4.576 0.032633 *
## s(IndividID) 11.219 234.000 0.053 0.218066
## s(GroupAtSampling) 14.991 41.000 0.959 0.000157 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.0938 Deviance explained = 12.7%
## fREML = 1750.3 Scale est. = 1.19 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 13 iterations.
## Gradient range [-7.020629e-06,1.941744e-05]
## (score 1750.298 & scale 1.189963).
## Hessian positive definite, eigenvalue range [1.940113e-06,565.6663].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00 5.28 0.96 0.085 .
## s(month) 8.00 2.46 0.97 0.150
## s(AgeAtSampling) 9.00 1.00 0.98 0.275
## s(Seq_depth_nospikein) 9.00 1.00 1.04 0.875
## s(Year) 4.00 1.00 0.96 0.100 .
## s(TotalAbundance) 9.00 1.00 1.01 0.540
## s(IndividID) 235.00 11.22 NA NA
## s(GroupAtSampling) 42.00 14.99 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 21
## [1] "Peptoclostridium"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.79789 0.12575 6.345 3.3e-10 ***
## StorageFROZEN -0.24745 0.13333 -1.856 0.0637 .
## Seq_runRUN2 0.02926 0.13279 0.220 0.8257
## Seq_runRUN3 0.10867 0.13293 0.818 0.4138
## Seq_runRUN4 0.10022 0.20244 0.495 0.6207
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 1.000e+00 1.000 0.951 0.329662
## s(month) 9.156e-06 8.000 0.000 0.641092
## s(AgeAtSampling) 1.000e+00 1.000 1.351 0.245321
## s(Seq_depth_nospikein) 1.000e+00 1.000 0.864 0.352836
## s(Year) 2.903e+00 3.361 10.054 6.39e-07 ***
## s(TotalAbundance) 2.479e+00 3.100 12.843 < 2e-16 ***
## s(IndividID) 5.494e+01 234.000 0.381 0.000306 ***
## s(GroupAtSampling) 1.902e+01 41.000 3.871 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.252 Deviance explained = 30.9%
## fREML = 1789.8 Scale est. = 1.2121 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 16 iterations.
## Gradient range [-6.619446e-06,7.861149e-06]
## (score 1789.839 & scale 1.212074).
## Hessian positive definite, eigenvalue range [1.763774e-06,567.0145].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00e+00 1.00e+00 1.03 0.77
## s(month) 8.00e+00 9.16e-06 0.93 <2e-16 ***
## s(AgeAtSampling) 9.00e+00 1.00e+00 1.01 0.68
## s(Seq_depth_nospikein) 9.00e+00 1.00e+00 1.05 0.96
## s(Year) 4.00e+00 2.90e+00 0.93 <2e-16 ***
## s(TotalAbundance) 9.00e+00 2.48e+00 1.02 0.67
## s(IndividID) 2.35e+02 5.49e+01 NA NA
## s(GroupAtSampling) 4.20e+01 1.90e+01 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 22
## [1] "Peptococcus"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.25520 0.07706 29.266 <2e-16 ***
## StorageFROZEN -0.08868 0.10000 -0.887 0.375
## Seq_runRUN2 -0.04861 0.09686 -0.502 0.616
## Seq_runRUN3 0.13087 0.10042 1.303 0.193
## Seq_runRUN4 -0.12682 0.15630 -0.811 0.417
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 1.000e+00 1.000 1.122 0.2898
## s(month) 4.582e-06 8.000 0.000 0.7422
## s(AgeAtSampling) 4.146e+00 4.990 28.706 <2e-16 ***
## s(Seq_depth_nospikein) 1.000e+00 1.000 5.087 0.0243 *
## s(Year) 2.082e+00 2.510 4.013 0.0114 *
## s(TotalAbundance) 1.746e+00 2.184 116.941 <2e-16 ***
## s(IndividID) 3.876e+01 234.000 0.215 0.0131 *
## s(GroupAtSampling) 5.695e+00 41.000 0.295 0.0255 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.435 Deviance explained = 46.4%
## fREML = 1525.3 Scale est. = 0.79314 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 15 iterations.
## Gradient range [-5.553717e-06,6.157545e-07]
## (score 1525.284 & scale 0.7931389).
## Hessian positive definite, eigenvalue range [1.494603e-06,566.1876].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00e+00 1.00e+00 1.02 0.76
## s(month) 8.00e+00 4.58e-06 1.01 0.64
## s(AgeAtSampling) 9.00e+00 4.15e+00 1.03 0.84
## s(Seq_depth_nospikein) 9.00e+00 1.00e+00 0.98 0.27
## s(Year) 4.00e+00 2.08e+00 1.01 0.60
## s(TotalAbundance) 9.00e+00 1.75e+00 0.99 0.41
## s(IndividID) 2.35e+02 3.88e+01 NA NA
## s(GroupAtSampling) 4.20e+01 5.70e+00 NA NA
## [1] 23
## [1] "Phascolarctobacterium"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.893665 0.068679 27.573 <2e-16 ***
## StorageFROZEN 0.064789 0.095786 0.676 0.499
## Seq_runRUN2 0.025142 0.089070 0.282 0.778
## Seq_runRUN3 -0.004265 0.093288 -0.046 0.964
## Seq_runRUN4 -0.026858 0.157739 -0.170 0.865
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 1.000e+00 1.000 10.139 0.00149 **
## s(month) 1.866e-05 8.000 0.000 0.52490
## s(AgeAtSampling) 4.458e+00 5.353 6.233 9.3e-06 ***
## s(Seq_depth_nospikein) 1.099e+00 1.190 1.042 0.36634
## s(Year) 1.000e+00 1.000 32.143 < 2e-16 ***
## s(TotalAbundance) 4.470e+00 5.404 110.006 < 2e-16 ***
## s(IndividID) 1.609e+01 234.000 0.078 0.13802
## s(GroupAtSampling) 3.407e+00 41.000 0.120 0.13560
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.557 Deviance explained = 57.1%
## fREML = 1535.7 Scale est. = 0.82549 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 13 iterations.
## Gradient range [-5.83792e-06,8.627223e-06]
## (score 1535.702 & scale 0.8254942).
## Hessian positive definite, eigenvalue range [1.786395e-06,565.6303].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00e+00 1.00e+00 0.98 0.180
## s(month) 8.00e+00 1.87e-05 0.97 0.170
## s(AgeAtSampling) 9.00e+00 4.46e+00 0.99 0.280
## s(Seq_depth_nospikein) 9.00e+00 1.10e+00 0.97 0.155
## s(Year) 4.00e+00 1.00e+00 0.94 0.025 *
## s(TotalAbundance) 9.00e+00 4.47e+00 0.94 0.025 *
## s(IndividID) 2.35e+02 1.61e+01 NA NA
## s(GroupAtSampling) 4.20e+01 3.41e+00 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 24
## [1] "Rare taxa"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.60894 0.02589 139.409 < 2e-16 ***
## StorageFROZEN -0.04196 0.03996 -1.050 0.294
## Seq_runRUN2 0.14696 0.03301 4.453 9.34e-06 ***
## Seq_runRUN3 0.13124 0.03353 3.914 9.64e-05 ***
## Seq_runRUN4 0.09722 0.06336 1.534 0.125
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 3.617e+00 4.427 5.210 0.000248 ***
## s(month) 2.299e+00 8.000 1.369 0.001935 **
## s(AgeAtSampling) 1.234e+00 1.429 0.945 0.462304
## s(Seq_depth_nospikein) 1.000e+00 1.000 0.117 0.732247
## s(Year) 2.541e+00 3.067 7.217 6.7e-05 ***
## s(TotalAbundance) 5.598e+00 6.555 123.943 < 2e-16 ***
## s(IndividID) 7.835e+00 234.000 0.035 0.303405
## s(GroupAtSampling) 1.642e-05 41.000 0.000 0.571876
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.568 Deviance explained = 57.8%
## fREML = 491.92 Scale est. = 0.13077 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 16 iterations.
## Gradient range [-2.629817e-06,1.682781e-06]
## (score 491.9209 & scale 0.1307729).
## Hessian positive definite, eigenvalue range [1.124065e-06,565.543].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00e+00 3.62e+00 1.07 0.99
## s(month) 8.00e+00 2.30e+00 0.95 0.05 *
## s(AgeAtSampling) 9.00e+00 1.23e+00 1.01 0.62
## s(Seq_depth_nospikein) 9.00e+00 1.00e+00 1.02 0.75
## s(Year) 4.00e+00 2.54e+00 0.93 0.02 *
## s(TotalAbundance) 9.00e+00 5.60e+00 0.99 0.41
## s(IndividID) 2.35e+02 7.83e+00 NA NA
## s(GroupAtSampling) 4.20e+01 1.64e-05 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 25
## [1] "Rikenellaceae RC9 gut group"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.28901 0.07144 18.042 <2e-16 ***
## StorageFROZEN -0.02868 0.10234 -0.280 0.779
## Seq_runRUN2 0.12756 0.09049 1.410 0.159
## Seq_runRUN3 0.13899 0.09317 1.492 0.136
## Seq_runRUN4 0.07412 0.15965 0.464 0.643
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 3.315e+00 4.047 4.897 0.000666 ***
## s(month) 2.044e+00 8.000 0.734 0.032035 *
## s(AgeAtSampling) 3.853e+00 4.681 16.251 < 2e-16 ***
## s(Seq_depth_nospikein) 1.000e+00 1.000 5.281 0.021752 *
## s(Year) 2.830e+00 3.344 9.646 1.53e-06 ***
## s(TotalAbundance) 4.176e+00 5.085 90.727 < 2e-16 ***
## s(IndividID) 2.135e+01 234.000 0.107 0.075758 .
## s(GroupAtSampling) 1.554e-04 41.000 0.000 0.456840
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.543 Deviance explained = 56%
## fREML = 1529.7 Scale est. = 0.80878 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 17 iterations.
## Gradient range [-2.59411e-06,1.416098e-05]
## (score 1529.662 & scale 0.808778).
## Hessian positive definite, eigenvalue range [1.985874e-06,565.7159].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00e+00 3.32e+00 1.04 0.915
## s(month) 8.00e+00 2.04e+00 0.97 0.095 .
## s(AgeAtSampling) 9.00e+00 3.85e+00 1.00 0.455
## s(Seq_depth_nospikein) 9.00e+00 1.00e+00 1.01 0.635
## s(Year) 4.00e+00 2.83e+00 0.97 0.130
## s(TotalAbundance) 9.00e+00 4.18e+00 1.01 0.575
## s(IndividID) 2.35e+02 2.14e+01 NA NA
## s(GroupAtSampling) 4.20e+01 1.55e-04 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 26
## [1] "Romboutsia"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.38755 0.10426 22.900 < 2e-16 ***
## StorageFROZEN 0.17059 0.14851 1.149 0.25094
## Seq_runRUN2 -0.40537 0.12555 -3.229 0.00128 **
## Seq_runRUN3 -0.28043 0.12697 -2.209 0.02740 *
## Seq_runRUN4 -0.05848 0.26834 -0.218 0.82753
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 2.54353 3.142 3.947 0.00746 **
## s(month) 1.93710 8.000 0.794 0.01854 *
## s(AgeAtSampling) 1.66523 2.079 16.882 < 2e-16 ***
## s(Seq_depth_nospikein) 2.27958 2.942 0.535 0.60366
## s(Year) 3.17335 3.645 4.945 0.00158 **
## s(TotalAbundance) 3.81355 4.695 32.660 < 2e-16 ***
## s(IndividID) 0.07013 234.000 0.000 0.46926
## s(GroupAtSampling) 7.32084 41.000 0.311 0.02639 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.305 Deviance explained = 32.2%
## fREML = 1935.8 Scale est. = 1.6875 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 19 iterations.
## Gradient range [-1.624929e-08,7.372425e-07]
## (score 1935.836 & scale 1.687487).
## Hessian positive definite, eigenvalue range [1.383488e-05,565.533].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.0000 2.5435 0.99 0.38
## s(month) 8.0000 1.9371 0.93 <2e-16 ***
## s(AgeAtSampling) 9.0000 1.6652 0.95 0.03 *
## s(Seq_depth_nospikein) 9.0000 2.2796 1.00 0.49
## s(Year) 4.0000 3.1733 0.90 <2e-16 ***
## s(TotalAbundance) 9.0000 3.8136 1.04 0.91
## s(IndividID) 235.0000 0.0701 NA NA
## s(GroupAtSampling) 42.0000 7.3208 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 27
## [1] "Ruminococcaceae UCG-014"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.70017 0.08744 19.444 <2e-16 ***
## StorageFROZEN -0.09706 0.11767 -0.825 0.410
## Seq_runRUN2 0.05508 0.10678 0.516 0.606
## Seq_runRUN3 0.03141 0.10985 0.286 0.775
## Seq_runRUN4 -0.21265 0.17822 -1.193 0.233
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 1.000 1.000 0.115 0.734354
## s(month) 2.921 8.000 1.923 0.000805 ***
## s(AgeAtSampling) 3.580 4.372 5.099 0.000378 ***
## s(Seq_depth_nospikein) 1.000 1.000 7.753 0.005453 **
## s(Year) 2.668 3.175 3.925 0.007476 **
## s(TotalAbundance) 1.000 1.000 72.982 < 2e-16 ***
## s(IndividID) 14.559 234.000 0.069 0.190576
## s(GroupAtSampling) 8.078 41.000 0.434 0.005649 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.179 Deviance explained = 20.7%
## fREML = 1685.9 Scale est. = 1.073 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 13 iterations.
## Gradient range [-5.410251e-06,3.327954e-06]
## (score 1685.9 & scale 1.073017).
## Hessian positive definite, eigenvalue range [2.140421e-06,565.6307].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00 1.00 1.00 0.525
## s(month) 8.00 2.92 0.94 0.020 *
## s(AgeAtSampling) 9.00 3.58 0.96 0.065 .
## s(Seq_depth_nospikein) 9.00 1.00 1.00 0.470
## s(Year) 4.00 2.67 0.96 0.095 .
## s(TotalAbundance) 9.00 1.00 1.01 0.695
## s(IndividID) 235.00 14.56 NA NA
## s(GroupAtSampling) 42.00 8.08 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 28
## [1] "Slackia"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.29955 0.04510 50.992 < 2e-16 ***
## StorageFROZEN -0.19307 0.06425 -3.005 0.00272 **
## Seq_runRUN2 0.13824 0.05753 2.403 0.01644 *
## Seq_runRUN3 0.14954 0.05895 2.537 0.01133 *
## Seq_runRUN4 0.30790 0.12722 2.420 0.01567 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 2.390 2.965 2.434 0.05295 .
## s(month) 2.595 8.000 0.896 0.03076 *
## s(AgeAtSampling) 1.000 1.001 0.000 0.99768
## s(Seq_depth_nospikein) 2.467 3.159 1.372 0.25825
## s(Year) 1.000 1.000 7.605 0.00592 **
## s(TotalAbundance) 4.901 5.847 91.653 < 2e-16 ***
## s(IndividID) 25.937 234.000 0.132 0.06426 .
## s(GroupAtSampling) 4.183 41.000 0.146 0.16861
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.501 Deviance explained = 52.2%
## fREML = 1068.1 Scale est. = 0.35619 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 13 iterations.
## Gradient range [-2.397232e-06,1.436141e-05]
## (score 1068.089 & scale 0.3561865).
## Hessian positive definite, eigenvalue range [2.088478e-06,565.8175].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00 2.39 0.99 0.390
## s(month) 8.00 2.59 0.97 0.105
## s(AgeAtSampling) 9.00 1.00 1.00 0.425
## s(Seq_depth_nospikein) 9.00 2.47 0.96 0.085 .
## s(Year) 4.00 1.00 1.03 0.810
## s(TotalAbundance) 9.00 4.90 0.98 0.195
## s(IndividID) 235.00 25.94 NA NA
## s(GroupAtSampling) 42.00 4.18 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 29
## [1] "Turicibacter"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2240 0.1136 10.773 <2e-16 ***
## StorageFROZEN -0.1063 0.1392 -0.763 0.445
## Seq_runRUN2 0.1653 0.1291 1.280 0.201
## Seq_runRUN3 -0.1153 0.1306 -0.883 0.378
## Seq_runRUN4 -0.2564 0.2196 -1.167 0.243
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 3.668 4.475 5.624 0.000105 ***
## s(month) 3.532 8.000 3.478 6.78e-06 ***
## s(AgeAtSampling) 1.679 2.080 1.426 0.267365
## s(Seq_depth_nospikein) 1.000 1.000 0.062 0.804245
## s(Year) 2.841 3.326 3.640 0.013399 *
## s(TotalAbundance) 3.866 4.737 6.022 3.38e-05 ***
## s(IndividID) 25.221 234.000 0.133 0.047406 *
## s(GroupAtSampling) 14.213 41.000 0.938 0.000534 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.16 Deviance explained = 20.4%
## fREML = 1857.7 Scale est. = 1.4087 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 12 iterations.
## Gradient range [-2.282441e-06,5.13065e-06]
## (score 1857.657 & scale 1.408664).
## Hessian positive definite, eigenvalue range [2.285786e-06,565.8858].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00 3.67 0.99 0.330
## s(month) 8.00 3.53 0.80 <2e-16 ***
## s(AgeAtSampling) 9.00 1.68 0.94 0.005 **
## s(Seq_depth_nospikein) 9.00 1.00 1.04 0.915
## s(Year) 4.00 2.84 0.74 <2e-16 ***
## s(TotalAbundance) 9.00 3.87 1.04 0.910
## s(IndividID) 235.00 25.22 NA NA
## s(GroupAtSampling) 42.00 14.21 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 30
## [1] "Tyzzerella 4"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.54132 0.08350 18.458 <2e-16 ***
## StorageFROZEN -0.06704 0.10673 -0.628 0.530
## Seq_runRUN2 -0.02590 0.10159 -0.255 0.799
## Seq_runRUN3 0.06984 0.10559 0.661 0.508
## Seq_runRUN4 0.09654 0.17068 0.566 0.572
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 1.255e+00 1.463 0.071 0.935267
## s(month) 2.027e-05 8.000 0.000 0.842551
## s(AgeAtSampling) 3.199e+00 3.931 5.209 0.000537 ***
## s(Seq_depth_nospikein) 1.000e+00 1.000 3.949 0.047161 *
## s(Year) 1.292e+00 1.494 13.588 0.000249 ***
## s(TotalAbundance) 3.904e+00 4.785 57.959 < 2e-16 ***
## s(IndividID) 1.681e+01 234.000 0.082 0.144724
## s(GroupAtSampling) 1.070e+01 41.000 0.593 0.003595 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.389 Deviance explained = 41.2%
## fREML = 1630.2 Scale est. = 0.96918 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 15 iterations.
## Gradient range [-7.828685e-06,3.235263e-06]
## (score 1630.165 & scale 0.9691812).
## Hessian positive definite, eigenvalue range [2.140878e-06,565.6819].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00e+00 1.26e+00 0.98 0.23
## s(month) 8.00e+00 2.03e-05 0.93 <2e-16 ***
## s(AgeAtSampling) 9.00e+00 3.20e+00 1.01 0.57
## s(Seq_depth_nospikein) 9.00e+00 1.00e+00 1.02 0.76
## s(Year) 4.00e+00 1.29e+00 0.91 <2e-16 ***
## s(TotalAbundance) 9.00e+00 3.90e+00 0.97 0.17
## s(IndividID) 2.35e+02 1.68e+01 NA NA
## s(GroupAtSampling) 4.20e+01 1.07e+01 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] 31
## [1] "Weissella"
##
## Family: gaussian
## Link function: identity
##
## Formula:
## log10(Abundance2 + 1) ~ s(HoursAfterSunrise, bs = "cr") + s(month,
## bs = "cc") + s(AgeAtSampling, bs = "cr") + s(Seq_depth_nospikein,
## bs = "cr") + s(Year, bs = "cr", k = 5) + s(TotalAbundance,
## bs = "cr") + Storage + Seq_run + s(IndividID, bs = "re") +
## s(GroupAtSampling, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.01397 0.10333 9.813 <2e-16 ***
## StorageFROZEN -0.21030 0.12512 -1.681 0.0931 .
## Seq_runRUN2 -0.14249 0.11839 -1.204 0.2290
## Seq_runRUN3 -0.02069 0.12080 -0.171 0.8640
## Seq_runRUN4 0.18444 0.20279 0.909 0.3633
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(HoursAfterSunrise) 1.000 1.000 0.570 0.45031
## s(month) 5.232 8.000 6.727 < 2e-16 ***
## s(AgeAtSampling) 1.425 1.725 3.020 0.11953
## s(Seq_depth_nospikein) 1.097 1.186 0.155 0.83225
## s(Year) 1.000 1.000 1.568 0.21083
## s(TotalAbundance) 2.750 3.441 4.256 0.00387 **
## s(IndividID) 23.171 234.000 0.119 0.07317 .
## s(GroupAtSampling) 14.958 41.000 1.114 5.85e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.127 Deviance explained = 16.9%
## fREML = 1794.6 Scale est. = 1.2686 n = 1141
##
## Method: fREML Optimizer: perf newton
## full convergence after 13 iterations.
## Gradient range [-3.196111e-06,1.453356e-05]
## (score 1794.623 & scale 1.268577).
## Hessian positive definite, eigenvalue range [2.265557e-06,565.851].
## Model rank = 330 / 330
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(HoursAfterSunrise) 9.00 1.00 1.03 0.86
## s(month) 8.00 5.23 0.92 0.01 **
## s(AgeAtSampling) 9.00 1.42 0.94 0.02 *
## s(Seq_depth_nospikein) 9.00 1.10 1.02 0.74
## s(Year) 4.00 1.00 0.89 <2e-16 ***
## s(TotalAbundance) 9.00 2.75 1.03 0.82
## s(IndividID) 235.00 23.17 NA NA
## s(GroupAtSampling) 42.00 14.96 NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
names(year_estimates)<-taxanames
year_estimates_df<-data.frame(do.call(rbind, year_estimates))
This chunk took 1.95 minutes
year_estimates_df$Significant <-year_estimates_df$P_val<0.05
year_estimates_df <- year_estimates_df %>% dplyr::arrange(-effect_size)
year_estimates_unique<-dplyr::distinct(year_estimates_df, Taxa, .keep_all = T)
dim(year_estimates_unique)
## [1] 31 15
taxa_order<-as.character(year_estimates_unique$Taxa)
year_estimates_df$Taxa<-factor(year_estimates_df$Taxa, levels = taxa_order)
########## calculate effect size manually (difference between early and late)
year_estimates_df$Year2<-substr(year_estimates_df$Year, 1, 4)
calculate_effect_size<-subset(year_estimates_df, Year2 <2001 | Year2 > 2014)
calculate_effect_size<-calculate_effect_size[,c(3,9,10, 16)]
calculate_effect_size$Period <- ifelse(calculate_effect_size$Year2 <2001, "EARLY", "LATE")
table(calculate_effect_size$Period)
##
## EARLY LATE
## 1147 1147
calculate_effect_size<-calculate_effect_size %>% group_by(Taxa, Period) %>% summarize(Mean_est = mean(est_real))
calculate_effect_size_wide<- spread(calculate_effect_size, Period, Mean_est)
head(calculate_effect_size_wide)
calculate_effect_size_wide$Effect<- calculate_effect_size_wide$LATE - calculate_effect_size_wide$EARLY
calculate_effect_size_wide<-merge(calculate_effect_size_wide, year_estimates_df, by = "Taxa")
calculate_effect_size_wide<-distinct(calculate_effect_size_wide, Taxa, .keep_all = T)
calculate_effect_size_wide<-calculate_effect_size_wide %>%arrange(Effect)
taxa_order<-as.character(calculate_effect_size_wide$Taxa)
This chunk took 0 minutes
################### plot
################### plot
################### plot
# first add class column so can match colours with earlier figs
taxonomy<-data.frame(tax_table(genus_ps))
tax<-taxonomy[,c("Genus", "Class")]
tax<-distinct(tax, Genus, .keep_all = T)
calculate_effect_size_wide$Class <- vlookup(calculate_effect_size_wide$Taxa, tax, lookup_column = "Genus", result_column = "Class")
calculate_effect_size_wide$Class_plot<-ifelse(calculate_effect_size_wide$Class %in% top_class, calculate_effect_size_wide$Class, "Other")
calculate_effect_size_wide$Class_plot<-factor(calculate_effect_size_wide$Class_plot, levels = c("Bacteroidia","Fusobacteriia" , "Other","Gammaproteobacteria", "Clostridia",
"Coriobacteriia","Actinobacteria", "Bacilli") )
##
calculate_effect_size_wide$Sig <-ifelse(calculate_effect_size_wide$Significant==T, "yes", "no")
plot_tb_1<-ggplot(subset(calculate_effect_size_wide, Taxa != "uncultured"), aes(y = reorder(Taxa, Effect), x = Effect))+
theme_bw(base_size = 14)+
geom_bar_pattern(stat = "identity", aes(fill = Class_plot, pattern = Sig),
color = "black",
pattern_fill = "black",
pattern_angle = 45,
pattern_density = 0.1,
pattern_spacing = 0.025,
pattern_key_scale_factor = 0.6)+
scale_pattern_manual(values = c(no = "stripe", yes = "none"))+
scale_fill_manual(values = rev(pal))+
ylab("") +
theme(plot.margin=unit(c(1,0.2,0.2,0.2),"cm"))+
xlab("Effect size")+
labs(pattern = "Significant")+
labs(fill = "Bacterial class")
plot_tb_1
This chunk took 0.05 minutes
TB_estimates<-list()
Condition_estimates<-list()
Temp_estimates<-list()
Rainfall_estimates<-list()
taxanames<-unique(top_genera_melt3$Genus2)
for (i in 1:length(taxanames)){
tryCatch({ #catch errors
print(i)
taxa1<-subset(top_genera_melt3, Genus2 == taxanames[i])
# taxa1<-subset(top_genera_melt3, Genus2 == "Fusobacterium")
metadata<-taxa1
#scale variables
metadata$AgeAtSampling<-as.numeric(scale(log10(metadata$AgeAtSampling)))
metadata$TotalAbundance<-as.numeric(scale(log10(metadata$TotalAbundance)))
metadata$Rainfall<-as.numeric(scale(log10(metadata$sum_rainfall_month+1)))
metadata$Seq_depth_nospikein<-as.numeric(scale(log10(metadata$Seq_depth_nospikein)))
metadata$TB_stage<-factor(metadata$TB_stage, levels = c("Unexposed", "Exposed","Diseased"))
metadata$Year<-factor(metadata$Year)
# fit gamm
m_taxa <- mgcv::gam(log10(Abundance2+1)~
s(HoursAfterSunrise, bs = "cr")+
s(month, bs = "cc")+
s(AgeAtSampling, bs = "cr")+
s(TotalAbundance, bs = "cr")+
s(Seq_depth_nospikein, bs = "cr")+
TB_stage+
Condition_weight+
Temp_max+
Rainfall+
Season+
Storage+
Seq_run+
s(IndividID, bs = "re")+
s(Year, bs = "re")+
s(GroupAtSampling, bs = "re"),
#correlation = corARMA(form = ~ 1|Year, p = 3),
data=metadata,
family = gaussian)
summary<-summary(m_taxa)
## TB estimates
TB_estimates_df<-data.frame(summary$p.table)[1:3,]
TB_estimates_df$Level<-c("Unexposed", "Exposed", "Diseased")
names(TB_estimates_df)[4]<-"P_val"
TB_estimates_df$Estimate2<-ifelse(TB_estimates_df$Level == "Unexposed", 0, TB_estimates_df$Estimate)
TB_estimates_df$lower<-NA
TB_estimates_df$upper<-NA
# confidence intervals exposed
beta <- coef(m_taxa)
Vb <- vcov(m_taxa, unconditional = TRUE)
se <- sqrt(diag(Vb))
j <- which(names(beta) == "(Intercept)")
cis<- beta[j] + (c(-1,1) * (2 * se[j]))
TB_estimates_df[1,7]<-cis[1]
TB_estimates_df[1,8]<-cis[2]
# confidence intervals exposed
j <- which(names(beta) == "TB_stageExposed")
cis<- beta[j] + (c(-1,1) * (2 * se[j]))
TB_estimates_df[2,7]<-cis[1]
TB_estimates_df[2,8]<-cis[2]
# confidence intervals symptomatic
j <- which(names(beta) == "TB_stageDiseased")
cis<- beta[j] + (c(-1,1) * (2 * se[j]))
TB_estimates_df[3,7]<-cis[1]
TB_estimates_df[3,8]<-cis[2]
## add coefficient
TB_estimates_df$Coef <-as.numeric(coef(m_taxa)[1:3])
TB_estimates_df$Taxa <-taxanames[i]
TB_estimates[[i]]<-TB_estimates_df
## extract condition stats
condition_estimates<- data.frame(summary$p.table)[4,]
condition_estimates$Taxa<-taxanames[i]
Condition_estimates[[i]]<-condition_estimates
## extract max temp stats
temp_estimates<- data.frame(summary$p.table)[5,]
temp_estimates$Taxa<-taxanames[i]
Temp_estimates[[i]]<-temp_estimates
## extract rainfall stats
rainfall_estimates<- data.frame(summary$p.table)[6,]
rainfall_estimates$Taxa<-taxanames[i]
Rainfall_estimates[[i]]<-rainfall_estimates
}, error=function(e){})
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26
## [1] 27
## [1] 28
## [1] 29
## [1] 30
## [1] 31
This chunk took 22.03 minutes
names(TB_estimates)<-taxanames
TB_estimates_df<-do.call(rbind, TB_estimates)
TB_estimates_df$Significant <-TB_estimates_df$P_val < 0.05
# edit significance (intercept will always be significant)
sig_taxa<-subset(TB_estimates_df, Significant == T & Level !="Unexposed")
sig_taxa<-unique(sig_taxa$Taxa)
TB_estimates_df$Significant<- TB_estimates_df$Taxa %in% sig_taxa
TB_estimates_df
## edit confidence intervals
TB_estimates_df$Level<-factor(TB_estimates_df$Level, levels = c("Diseased", "Exposed", "Unexposed"))
TB_estimates_df$lower<- ifelse(TB_estimates_df$Level == "Unexposed", TB_estimates_df$lower-TB_estimates_df$Estimate, TB_estimates_df$lower)
TB_estimates_df$upper<- ifelse(TB_estimates_df$Level == "Unexposed", TB_estimates_df$upper-TB_estimates_df$Estimate, TB_estimates_df$upper)
# order taxa
TB_estimates_df$Taxa<-factor(TB_estimates_df$Taxa, levels = taxa_order)
#############
#############
#############
plot_tb_2<-ggplot(subset(TB_estimates_df, Taxa!= "uncultured"), aes( x = Estimate2, y = Taxa, group = Level))+
theme_bw(base_size = 14)+
geom_vline(xintercept = 0, linetype = "dashed")+
geom_errorbarh(aes(col = Level, xmin = Estimate2- Std..Error, xmax = Estimate2+ Std..Error), height = 0, size = 1, position=position_dodge(width=0.5))+
geom_errorbarh(aes(alpha = Significant, xmin = Estimate2- Std..Error, xmax = Estimate2+ Std..Error), height = 0, size = 1, col = "grey", position=position_dodge(width=0.5))+
scale_alpha_discrete(range = c(1,0), guide = "none")+
geom_point(aes(col = Level), size = 2, position=position_dodge(width=0.5))+
scale_color_manual(values = c( "brown2", "skyblue","forestgreen"), guide = guide_legend(reverse = TRUE))+
xlim(c(-0.9,NA))+
theme(axis.text.y = element_blank(), axis.title.y = element_blank())+
xlab("Effect size")+
labs(col = "TB stage")+
theme(plot.margin=unit(c(1,0.2,0.2,0.2),"cm"))+
# legend
theme( legend.background = element_blank(),
legend.box.background = element_rect(colour = "grey", fill = "white"))+
theme(legend.position=c(0.2,0.95), legend.direction="vertical",
legend.key.height = unit(0.05, 'cm'),
legend.key.width = unit(0.02, 'cm'),
legend.title=element_text(size=8),
legend.text =element_text(size=8))+
guides(colour=guide_legend(title.position="top", title.hjust =0.5,reverse = TRUE))+
coord_cartesian(xlim = c(-0.7, 0.7)) +
scale_x_continuous(breaks = c(-0.3, 0, 0.3))+
theme(legend.margin=margin(t = c(0.1,0.1,0.1,0.1), unit='cm'))
plot_tb_2
This chunk took 0.05 minutes
names(Condition_estimates)<-taxanames
condition_estimates_df<-do.call(rbind, Condition_estimates)
condition_estimates_df$Significant <-condition_estimates_df$Pr...t.. < 0.05
condition_estimates_df$Taxa<-factor(condition_estimates_df$Taxa, levels = taxa_order)
plot_condition<-
ggplot(subset(condition_estimates_df, Taxa!= "uncultured"), aes( x = Estimate, y = Taxa))+
theme_bw(base_size = 14)+
geom_point( aes(col = Significant), size = 3)+
geom_errorbarh(aes(col = Significant, xmin = ((Estimate- Std..Error)-0.0005), xmax = ((Estimate+ Std..Error)+0.0005) ), height = 0, size = 1)+
geom_vline(xintercept = 0, linetype = "dashed")+
scale_color_manual(values = c("grey", "blue"))+
theme(axis.text.y = element_blank(), axis.title.y = element_blank())+
xlab("Effect size")+
xlim(c(-0.004,NA))+
#legend
theme( legend.background = element_blank(),
legend.box.background = element_rect(colour = "grey", fill = "white"))+
theme(legend.position=c(0.8,0.95), legend.direction="vertical",
legend.key.height = unit(0.02, 'cm'),
legend.key.width = unit(0.02, 'cm'),
legend.title=element_text(size=8),
legend.text =element_text(size=6))+
guides(colour=guide_legend(title.position="top", reverse = T,
title.hjust =0.5))+
coord_cartesian(xlim = c(-0.01, 0.01))+
scale_x_continuous(breaks = c(-0.005, 0, 0.005))
plot_condition
This chunk took 0.02 minutes
names(Temp_estimates)<-taxanames
temp_estimates_df<-do.call(rbind, Temp_estimates)
temp_estimates_df$Significant <-temp_estimates_df$Pr...t.. < 0.05
temp_estimates_df$Taxa<-factor(temp_estimates_df$Taxa, levels = taxa_order)
plot_temp<-
ggplot(subset(temp_estimates_df, Taxa!= "uncultured"), aes( x = Estimate, y = Taxa))+
theme_bw(base_size = 14)+
geom_point( aes(col = Significant), size = 3)+
geom_errorbarh(aes(col = Significant, xmin = Estimate- Std..Error, xmax = Estimate+ Std..Error), height = 0, size = 1)+
geom_vline(xintercept = 0, linetype = "dashed")+
scale_color_manual(values = c("grey", "blue"))+
theme(axis.text.y = element_blank(), axis.title.y = element_blank())+
xlab("Effect size")+
# xlim(c(-0.004,NA))+
#legend
theme( legend.background = element_blank(),
legend.box.background = element_rect(colour = "grey", fill = "white"))+
theme(legend.position=c(0.8,0.95), legend.direction="vertical",
legend.key.height = unit(0.02, 'cm'),
legend.key.width = unit(0.02, 'cm'),
legend.title=element_text(size=8),
legend.text =element_text(size=6))+
guides(colour=guide_legend(title.position="top", reverse = T,
title.hjust =0.5))+
coord_cartesian(xlim = c(-0.1, 0.1))+
scale_x_continuous(breaks = c(-0.05, 0, 0.05))
plot_temp
This chunk took 0.02 minutes
names(Rainfall_estimates)<-taxanames
rainfall_estimates_df<-do.call(rbind, Rainfall_estimates)
rainfall_estimates_df$Significant <-rainfall_estimates_df$Pr...t.. < 0.05
rainfall_estimates_df$Taxa<-factor(rainfall_estimates_df$Taxa, levels = taxa_order)
plot_rainfall<-
ggplot(subset(rainfall_estimates_df, Taxa!= "uncultured"), aes( x = Estimate, y = Taxa))+
theme_bw(base_size = 14)+
geom_point( aes(col = Significant), size = 3)+
geom_errorbarh(aes(col = Significant, xmin = Estimate- Std..Error, xmax = Estimate+ Std..Error), height = 0, size = 1)+
geom_vline(xintercept = 0, linetype = "dashed")+
scale_color_manual(values = c("grey", "blue"))+
theme(axis.text.y = element_blank(), axis.title.y = element_blank())+
xlab("Effect size")+
# xlim(c(-0.004,NA))+
#legend
theme( legend.background = element_blank(),
legend.box.background = element_rect(colour = "grey", fill = "white"))+
theme(legend.position=c(0.8,0.95), legend.direction="vertical",
legend.key.height = unit(0.02, 'cm'),
legend.key.width = unit(0.02, 'cm'),
legend.title=element_text(size=8),
legend.text =element_text(size=6))+
guides(colour=guide_legend(title.position="top", reverse = T,
title.hjust =0.5))+
coord_cartesian(xlim = c(-0.5, 0.5))+
scale_x_continuous(breaks = c(-0.3, 0, 0.3))
plot_rainfall
This chunk took 0.01 minutes
# Combining plots
rm_legend <- function(p){p + theme(legend.position = "none")}
ggarrange(rm_legend(plot_tb_1), plot_tb_2, plot_rainfall, plot_temp, plot_condition, nrow = 1, align = "h", widths = c(2.2,1,1,1, 1), labels = c("a) Year", "b) TB", "c) Rainfall","d) Max. temp.", "e) Body condition"), label.x = c(0.45,-0.05,-0.15,-0.25, -0.32))
#ggsave("Figures/Fig4.pdf")
This chunk took 0.21 minutes
*Need to do CLR analysis from here as this accounts for bacterial load
#### extract site x species array
meerkat_filtered_genus<-tax_glom(meerkat_filtered, taxrank = "Genus")
genus_core<-prune_taxa(top_genera, meerkat_filtered_genus)
taxtable<-data.frame(tax_table(genus_core))
taxa_names(genus_core)<-taxtable$Genus
phylo<-genus_core
taxtable<-data.frame(tax_table(phylo))
taxa_names(phylo)<-taxtable$Genus
phylo_array<-data.frame(t(phylo@otu_table@.Data))
#phylo_array[1:5,1:5]
names(phylo_array)<-taxa_names(phylo)
dim(phylo_array)
## [1] 1141 30
##########################
#### genreate association matrix from the 'real' data
# this uses the package NetCoMi
net_real <- netConstruct(phylo_array, verbose = 0,
measure = "spearman",
normMethod = "clr",
zeroMethod = "none",
sparsMethod = "threshold",
thresh = 0.3)
props_single <- netAnalyze(net_real,
centrLCC = TRUE,
clustMethod = "cluster_fast_greedy",
hubPar = "eigenvector",
weightDeg = FALSE,
normDeg = FALSE)
plot(props_single,
nodeColor = "cluster",
layout = "spring",
repulsion = 1,
# nodeSize = "eigenvector",
title1 = F,
#showTitle = TRUE,
cexTitle = 2.3,
shortenLabels = "simple",
labelScale = F,
nodeSize = "fix",
colorVec = c("orchid", "skyblue", "orange", "lightgreen", "pink"),
nodeTransp = 0,
borderCol = "white",
borderWidth = 3,
highlightHubs = F ,
edgeTranspLow = 0)
legend(0.7, 1.1, cex = 1, title = "Direction",
legend = c("+","-"), lty = 1, lwd = 1, col = c("#009900","red"),
bty = "n", horiz = F)
cluster_memb<-data.frame(props_single$clustering$clust1)
head(cluster_memb, 20)
names(cluster_memb)<-"Cluster"
# make singletons their own cluster (not zero)
cluster_memb$Cluster<-ifelse(row.names(cluster_memb)=="Escherichia-Shigella", 6,cluster_memb$Cluster )
cluster_memb$Cluster<-ifelse(row.names(cluster_memb)=="Peptoclostridium", 7,cluster_memb$Cluster )
cluster_memb$Cluster<-ifelse(row.names(cluster_memb)=="Ruminococcaceae UCG-014", 8,cluster_memb$Cluster )
cluster_memb$Cluster2<-case_when(cluster_memb == 1 ~ "Lactococcus cluster",
cluster_memb == 2 ~ "Bacteroides cluster",
cluster_memb == 3 ~ "Blautia cluster",
cluster_memb == 4 ~ "Clostridium cluster",
cluster_memb == 5 ~ "Bacillus cluster",
cluster_memb == 6 ~ "Escherichia-Shigella",
cluster_memb == 7 ~ "Peptoclostridium",
cluster_memb == 8 ~ "Ruminococcus UCG-014" )
This chunk took 0.25 minutes
meerkat_filtered_genus<-tax_glom(meerkat_filtered, taxrank = "Genus")
genus_core<-prune_taxa(top_genera, meerkat_filtered_genus)
taxtable<-data.frame(tax_table(genus_core))
taxa_names(genus_core)<-taxtable$Genus
### 1) Genus level
### 1) Genus level
### 1) Genus level
### 1) Genus level
survival_df<-data.frame(sample_data(genus_core))
head(survival_df)
# calculate if ID is alive 6 months later, TRUE = dead
survival_df$Time_180<-survival_df$SampleDate+180
survival_df$Survival_180<- ifelse(survival_df$Time_180 < survival_df$DeathDate, FALSE,TRUE)
table(survival_df$Survival_180)
##
## FALSE TRUE
## 811 330
survival_df$FollowUp_180<-180
## add on genus level abundance
genus_clr<-microbiome::transform(genus_core, transform = "clr")
otutable<-data.frame(t(otu_table(genus_clr)))
otutable[1:5, 1:5]
names(otutable)<-taxa_names(genus_clr)
names(otutable)<-make.names(names(otutable),unique = TRUE)
otutable<-data.frame(scale(otutable))
#hist(otutable$Helicobacter)
survival_df_genus<-merge(survival_df, otutable, by = 0)
head(survival_df_genus)
# model
names(survival_df_genus)
## [1] "Row.names" "feature.id"
## [3] "IndividID" "SampleDate"
## [5] "SampleTime" "Storage"
## [7] "Seq_run" "Imtechella"
## [9] "Allobacillus" "Seq_depth"
## [11] "scaling_factor" "Seq_depth_nospikein"
## [13] "BirthDate" "DeathDate"
## [15] "AgeAtSampling" "GroupAtSampling"
## [17] "Condition_weight" "SocialStatus"
## [19] "Temp_max" "sum_rainfall_month"
## [21] "HoursAfterSunrise" "TimeCat"
## [23] "Year" "month"
## [25] "Season" "TB_status"
## [27] "TB_exposure" "TB_resistance"
## [29] "TB_survival" "TB_symptom_date"
## [31] "TB_exposure_date" "TB_stage"
## [33] "TotalAbundance" "Time_180"
## [35] "Survival_180" "FollowUp_180"
## [37] "Helicobacter" "Geodermatophilus"
## [39] "Fusobacterium" "Tyzzerella.4"
## [41] "X.Ruminococcus..torques.group" "Blautia"
## [43] "Marvinbryantia" "Lachnoclostridium"
## [45] "Lachnospiraceae.NK4A136.group" "Rikenellaceae.RC9.gut.group"
## [47] "Bacteroides" "Alloprevotella"
## [49] "Parabacteroides" "Paeniclostridium"
## [51] "Romboutsia" "Peptoclostridium"
## [53] "Collinsella" "Enterococcus"
## [55] "Pediococcus" "Weissella"
## [57] "Bacillus" "Turicibacter"
## [59] "Catenisphaera" "Lactococcus"
## [61] "Clostridium.sensu.stricto.1" "Peptococcus"
## [63] "Phascolarctobacterium" "Ruminococcaceae.UCG.014"
## [65] "Slackia" "Escherichia.Shigella"
#survival_df_genus<-survival_df_genus[,c(2, 30, 31,8, 13,20,27,19,18,32:60)]
names(survival_df_genus)
## [1] "Row.names" "feature.id"
## [3] "IndividID" "SampleDate"
## [5] "SampleTime" "Storage"
## [7] "Seq_run" "Imtechella"
## [9] "Allobacillus" "Seq_depth"
## [11] "scaling_factor" "Seq_depth_nospikein"
## [13] "BirthDate" "DeathDate"
## [15] "AgeAtSampling" "GroupAtSampling"
## [17] "Condition_weight" "SocialStatus"
## [19] "Temp_max" "sum_rainfall_month"
## [21] "HoursAfterSunrise" "TimeCat"
## [23] "Year" "month"
## [25] "Season" "TB_status"
## [27] "TB_exposure" "TB_resistance"
## [29] "TB_survival" "TB_symptom_date"
## [31] "TB_exposure_date" "TB_stage"
## [33] "TotalAbundance" "Time_180"
## [35] "Survival_180" "FollowUp_180"
## [37] "Helicobacter" "Geodermatophilus"
## [39] "Fusobacterium" "Tyzzerella.4"
## [41] "X.Ruminococcus..torques.group" "Blautia"
## [43] "Marvinbryantia" "Lachnoclostridium"
## [45] "Lachnospiraceae.NK4A136.group" "Rikenellaceae.RC9.gut.group"
## [47] "Bacteroides" "Alloprevotella"
## [49] "Parabacteroides" "Paeniclostridium"
## [51] "Romboutsia" "Peptoclostridium"
## [53] "Collinsella" "Enterococcus"
## [55] "Pediococcus" "Weissella"
## [57] "Bacillus" "Turicibacter"
## [59] "Catenisphaera" "Lactococcus"
## [61] "Clostridium.sensu.stricto.1" "Peptococcus"
## [63] "Phascolarctobacterium" "Ruminococcaceae.UCG.014"
## [65] "Slackia" "Escherichia.Shigella"
survival_df_genus$AgeAtSampling<-scale(log10(survival_df_genus$AgeAtSampling))
survival_df_genus$Condition_weight<-scale(survival_df_genus$Condition_weight)
survival_df_genus$IndividID<-as.character(survival_df_genus$IndividID)
survival_df_genus$Season <-case_when(survival_df_genus$Season == "WET" ~ "Wet",
survival_df_genus$Season == "DRY" ~ "Dry")
names(survival_df_genus)[15]<-"Age"
survival_df_genus<-subset(survival_df_genus, !is.na(Survival_180))
dim(survival_df_genus)
## [1] 1141 66
#survival_df_genus$Survival<-Surv(survival_df_genus$FollowUp_180, survival_df_genus$Survival_180)
frmla <- as.formula(paste("Surv(FollowUp_180, Survival_180)",
paste(names(survival_df_genus)[5:38], sep = "",
collapse = " + "), sep = " ~ "))
frmla <-update.formula(frmla, ~ . + (1|IndividID))
fit_genus <- coxme::coxme(Surv(FollowUp_180, Survival_180) ~
Age +
Season +
TB_stage +
SocialStatus +
Condition_weight +
Fusobacterium+
Bacteroides+
Parabacteroides+
Phascolarctobacterium+
Helicobacter +
Alloprevotella+
Enterococcus +
Pediococcus +
Lactococcus+
Peptoclostridium+
Collinsella+
Slackia+
(1 | IndividID),
data = survival_df_genus)
fit_genus
## Cox mixed-effects model fit by maximum likelihood
## Data: survival_df_genus
## events, n = 330, 1141
## Iterations= 24 150
## NULL Integrated Fitted
## Log-likelihood -2270.128 -2065.913 -1911.819
##
## Chisq df p AIC BIC
## Integrated loglik 408.43 19.00 0 370.43 298.25
## Penalized loglik 716.62 133.39 0 449.84 -56.92
##
## Model: Surv(FollowUp_180, Survival_180) ~ Age + Season + TB_stage + SocialStatus + Condition_weight + Fusobacterium + Bacteroides + Parabacteroides + Phascolarctobacterium + Helicobacter + Alloprevotella + Enterococcus + Pediococcus + Lactococcus + Peptoclostridium + Collinsella + Slackia + (1 | IndividID)
## Fixed coefficients
## coef exp(coef) se(coef) z p
## Age 1.578270076 4.8465644 0.12349691 12.78 0.0e+00
## SeasonWet -0.345947098 0.7075499 0.13207267 -2.62 8.8e-03
## TB_stageExposed 1.468404417 4.3423011 0.21576219 6.81 1.0e-11
## TB_stageDiseased 2.253788964 9.5237527 0.26572017 8.48 0.0e+00
## SocialStatusSubordinate 0.716211386 2.0466645 0.21278473 3.37 7.6e-04
## Condition_weight -0.280096365 0.7557109 0.06493959 -4.31 1.6e-05
## Fusobacterium 0.079388592 1.0826249 0.08837575 0.90 3.7e-01
## Bacteroides -0.089877390 0.9140432 0.10342463 -0.87 3.8e-01
## Parabacteroides 0.019271270 1.0194582 0.09393135 0.21 8.4e-01
## Phascolarctobacterium -0.128020564 0.8798353 0.09877150 -1.30 1.9e-01
## Helicobacter -0.052896360 0.9484783 0.07958094 -0.66 5.1e-01
## Alloprevotella 0.082769014 1.0862909 0.07785428 1.06 2.9e-01
## Enterococcus -0.090187680 0.9137597 0.07411484 -1.22 2.2e-01
## Pediococcus -0.102416622 0.9026534 0.07462879 -1.37 1.7e-01
## Lactococcus -0.092793469 0.9113817 0.07505914 -1.24 2.2e-01
## Peptoclostridium -0.007256337 0.9927699 0.07105360 -0.10 9.2e-01
## Collinsella 0.112057996 1.1185777 0.08414084 1.33 1.8e-01
## Slackia -0.092482014 0.9116656 0.08569759 -1.08 2.8e-01
##
## Random effects
## Group Variable Std Dev Variance
## IndividID Intercept 1.045863 1.093830
sjPlot::plot_model(fit_genus, axis.lim = c(0.8,20), vline.color = "darkgrey", sort.est = T)+ theme_bw(base_size = 16)+ggtitle("")+ylab("Hazard ratio")
#options(na.action = "na.fail")
#dredge_genus_df<- MuMIn::dredge(fit_genus, fixed = c("Age", "Season", "TB_stage",
# "SocialStatus", "Condition_weight"))
#write.csv(dredge_genus_df, "dredge_results_genus.csv")
######### 2) Network approach ############
######### 2) Network approach ############
######### 2) Network approach ############
######### 2) Network approach ############
pseq_network<-genus_core
head(cluster_memb)
tail(cluster_memb)
pseq_network@tax_table@.Data[,1]<-cluster_memb$Cluster2
pseq_network@tax_table@.Data
## Kingdom Phylum
## Helicobacter "Bacteroides cluster" "Epsilonbacteraeota"
## Geodermatophilus "Bacillus cluster" "Actinobacteria"
## Fusobacterium "Bacteroides cluster" "Fusobacteria"
## Tyzzerella 4 "Bacteroides cluster" "Firmicutes"
## [Ruminococcus] torques group "Blautia cluster" "Firmicutes"
## Blautia "Blautia cluster" "Firmicutes"
## Marvinbryantia "Blautia cluster" "Firmicutes"
## Lachnoclostridium "Blautia cluster" "Firmicutes"
## Lachnospiraceae NK4A136 group "Bacteroides cluster" "Firmicutes"
## Rikenellaceae RC9 gut group "Bacteroides cluster" "Bacteroidetes"
## Bacteroides "Bacteroides cluster" "Bacteroidetes"
## Alloprevotella "Bacteroides cluster" "Bacteroidetes"
## Parabacteroides "Bacteroides cluster" "Bacteroidetes"
## Paeniclostridium "Clostridium cluster" "Firmicutes"
## Romboutsia "Clostridium cluster" "Firmicutes"
## Peptoclostridium "Peptoclostridium" "Firmicutes"
## Collinsella "Blautia cluster" "Actinobacteria"
## Enterococcus "Lactococcus cluster" "Firmicutes"
## Pediococcus "Lactococcus cluster" "Firmicutes"
## Weissella "Lactococcus cluster" "Firmicutes"
## Bacillus "Bacillus cluster" "Firmicutes"
## Turicibacter "Bacillus cluster" "Firmicutes"
## Catenisphaera "Blautia cluster" "Firmicutes"
## Lactococcus "Lactococcus cluster" "Firmicutes"
## Clostridium sensu stricto 1 "Clostridium cluster" "Firmicutes"
## Peptococcus "Blautia cluster" "Firmicutes"
## Phascolarctobacterium "Bacteroides cluster" "Firmicutes"
## Ruminococcaceae UCG-014 "Ruminococcus UCG-014" "Firmicutes"
## Slackia "Blautia cluster" "Actinobacteria"
## Escherichia-Shigella "Escherichia-Shigella" "Proteobacteria"
## Class Order
## Helicobacter "Campylobacteria" "Campylobacterales"
## Geodermatophilus "Actinobacteria" "Frankiales"
## Fusobacterium "Fusobacteriia" "Fusobacteriales"
## Tyzzerella 4 "Clostridia" "Clostridiales"
## [Ruminococcus] torques group "Clostridia" "Clostridiales"
## Blautia "Clostridia" "Clostridiales"
## Marvinbryantia "Clostridia" "Clostridiales"
## Lachnoclostridium "Clostridia" "Clostridiales"
## Lachnospiraceae NK4A136 group "Clostridia" "Clostridiales"
## Rikenellaceae RC9 gut group "Bacteroidia" "Bacteroidales"
## Bacteroides "Bacteroidia" "Bacteroidales"
## Alloprevotella "Bacteroidia" "Bacteroidales"
## Parabacteroides "Bacteroidia" "Bacteroidales"
## Paeniclostridium "Clostridia" "Clostridiales"
## Romboutsia "Clostridia" "Clostridiales"
## Peptoclostridium "Clostridia" "Clostridiales"
## Collinsella "Coriobacteriia" "Coriobacteriales"
## Enterococcus "Bacilli" "Lactobacillales"
## Pediococcus "Bacilli" "Lactobacillales"
## Weissella "Bacilli" "Lactobacillales"
## Bacillus "Bacilli" "Bacillales"
## Turicibacter "Erysipelotrichia" "Erysipelotrichales"
## Catenisphaera "Erysipelotrichia" "Erysipelotrichales"
## Lactococcus "Bacilli" "Lactobacillales"
## Clostridium sensu stricto 1 "Clostridia" "Clostridiales"
## Peptococcus "Clostridia" "Clostridiales"
## Phascolarctobacterium "Negativicutes" "Selenomonadales"
## Ruminococcaceae UCG-014 "Clostridia" "Clostridiales"
## Slackia "Coriobacteriia" "Coriobacteriales"
## Escherichia-Shigella "Gammaproteobacteria" "Enterobacteriales"
## Family
## Helicobacter "Helicobacteraceae"
## Geodermatophilus "Geodermatophilaceae"
## Fusobacterium "Fusobacteriaceae"
## Tyzzerella 4 "Lachnospiraceae"
## [Ruminococcus] torques group "Lachnospiraceae"
## Blautia "Lachnospiraceae"
## Marvinbryantia "Lachnospiraceae"
## Lachnoclostridium "Lachnospiraceae"
## Lachnospiraceae NK4A136 group "Lachnospiraceae"
## Rikenellaceae RC9 gut group "Rikenellaceae"
## Bacteroides "Bacteroidaceae"
## Alloprevotella "Prevotellaceae"
## Parabacteroides "Tannerellaceae"
## Paeniclostridium "Peptostreptococcaceae"
## Romboutsia "Peptostreptococcaceae"
## Peptoclostridium "Peptostreptococcaceae"
## Collinsella "Coriobacteriaceae"
## Enterococcus "Enterococcaceae"
## Pediococcus "Lactobacillaceae"
## Weissella "Leuconostocaceae"
## Bacillus "Bacillaceae"
## Turicibacter "Erysipelotrichaceae"
## Catenisphaera "Erysipelotrichaceae"
## Lactococcus "Streptococcaceae"
## Clostridium sensu stricto 1 "Clostridiaceae 1"
## Peptococcus "Peptococcaceae"
## Phascolarctobacterium "Acidaminococcaceae"
## Ruminococcaceae UCG-014 "Ruminococcaceae"
## Slackia "Eggerthellaceae"
## Escherichia-Shigella "Enterobacteriaceae"
## Genus Species
## Helicobacter "Helicobacter" NA
## Geodermatophilus "Geodermatophilus" NA
## Fusobacterium "Fusobacterium" NA
## Tyzzerella 4 "Tyzzerella 4" NA
## [Ruminococcus] torques group "[Ruminococcus] torques group" NA
## Blautia "Blautia" NA
## Marvinbryantia "Marvinbryantia" NA
## Lachnoclostridium "Lachnoclostridium" NA
## Lachnospiraceae NK4A136 group "Lachnospiraceae NK4A136 group" NA
## Rikenellaceae RC9 gut group "Rikenellaceae RC9 gut group" NA
## Bacteroides "Bacteroides" NA
## Alloprevotella "Alloprevotella" NA
## Parabacteroides "Parabacteroides" NA
## Paeniclostridium "Paeniclostridium" NA
## Romboutsia "Romboutsia" NA
## Peptoclostridium "Peptoclostridium" NA
## Collinsella "Collinsella" NA
## Enterococcus "Enterococcus" NA
## Pediococcus "Pediococcus" NA
## Weissella "Weissella" NA
## Bacillus "Bacillus" NA
## Turicibacter "Turicibacter" NA
## Catenisphaera "Catenisphaera" NA
## Lactococcus "Lactococcus" NA
## Clostridium sensu stricto 1 "Clostridium sensu stricto 1" NA
## Peptococcus "Peptococcus" NA
## Phascolarctobacterium "Phascolarctobacterium" NA
## Ruminococcaceae UCG-014 "Ruminococcaceae UCG-014" NA
## Slackia "Slackia" NA
## Escherichia-Shigella "Escherichia-Shigella" NA
pseq_network2<-tax_glom(pseq_network, taxrank = "Kingdom")
pseq_network2<-microbiome::transform(pseq_network2, "clr")
taxa_names(pseq_network2)
## [1] "Blautia" "Bacteroides"
## [3] "Peptoclostridium" "Enterococcus"
## [5] "Bacillus" "Clostridium sensu stricto 1"
## [7] "Ruminococcaceae UCG-014" "Escherichia-Shigella"
taxa_names(pseq_network2)<-c("Blautia_cluster", "Bacteroides_cluster", "Peptoclostridium", "Lactococcus_cluster", "Bacillus_cluster", "Clostridium_cluster", "Ruminococcus_UCG-014", "Escherichia-Shigella")
otutable<-data.frame(t(otu_table(pseq_network2)))
otutable[1:5, 1:5]
otutable<-data.frame(scale(otutable))
#hist(otutable$Helicobacter)
survival_df_network<-merge(survival_df, otutable, by = 0)
#############
#############
#############
#############
names(survival_df_network)
## [1] "Row.names" "feature.id" "IndividID"
## [4] "SampleDate" "SampleTime" "Storage"
## [7] "Seq_run" "Imtechella" "Allobacillus"
## [10] "Seq_depth" "scaling_factor" "Seq_depth_nospikein"
## [13] "BirthDate" "DeathDate" "AgeAtSampling"
## [16] "GroupAtSampling" "Condition_weight" "SocialStatus"
## [19] "Temp_max" "sum_rainfall_month" "HoursAfterSunrise"
## [22] "TimeCat" "Year" "month"
## [25] "Season" "TB_status" "TB_exposure"
## [28] "TB_resistance" "TB_survival" "TB_symptom_date"
## [31] "TB_exposure_date" "TB_stage" "TotalAbundance"
## [34] "Time_180" "Survival_180" "FollowUp_180"
## [37] "Blautia_cluster" "Bacteroides_cluster" "Peptoclostridium"
## [40] "Lactococcus_cluster" "Bacillus_cluster" "Clostridium_cluster"
## [43] "Ruminococcus_UCG.014" "Escherichia.Shigella"
#survival_df_network<-survival_df_network[,c(2, 8,30, 31, 13,20,27,19,18,32:38)]
#names(survival_df_network)
survival_df_network$AgeAtSampling<-scale(log10(survival_df_network$AgeAtSampling))
survival_df_network$Condition_weight<-scale(survival_df_network$Condition_weight)
survival_df_network$IndividID<-as.character(survival_df_network$IndividID)
survival_df_network$Season <-case_when(survival_df_network$Season == "WET" ~ "Wet",
survival_df_network$Season == "DRY" ~ "Dry")
names(survival_df_network)[15]<-"Age"
survival_df_network<-subset(survival_df_network, !is.na(Survival_180))
dim(survival_df_network)
## [1] 1141 44
#survival_df_network$Survival<-Surv(survival_df_network$FollowUp_180, survival_df_network$Survival_180)
frmla <- as.formula(paste("Surv(FollowUp_180, Survival_180)",
paste(names(survival_df_network)[5:16], sep = "",
collapse = " + "), sep = " ~ "))
frmla <-update.formula(frmla, ~ . + (1|IndividID))
fit_network <- coxme::coxme(Surv(FollowUp_180, Survival_180) ~ Age +
Season +
TB_stage +
SocialStatus +
Condition_weight +
Blautia_cluster +
Bacteroides_cluster +
Peptoclostridium +
Lactococcus_cluster +
Bacillus_cluster +
Clostridium_cluster +
Ruminococcus_UCG.014 +
(1 | IndividID),
data = survival_df_network)
summary(fit_network)
## Cox mixed-effects model fit by maximum likelihood
## Data: survival_df_network
## events, n = 330, 1141
## Iterations= 23 144
## NULL Integrated Fitted
## Log-likelihood -2270.128 -2068.655 -1911.604
##
## Chisq df p AIC BIC
## Integrated loglik 402.95 14.0 0 374.95 321.76
## Penalized loglik 717.05 130.5 0 456.04 -39.75
##
## Model: Surv(FollowUp_180, Survival_180) ~ Age + Season + TB_stage + SocialStatus + Condition_weight + Blautia_cluster + Bacteroides_cluster + Peptoclostridium + Lactococcus_cluster + Bacillus_cluster + Clostridium_cluster + Ruminococcus_UCG.014 + (1 | IndividID)
## Fixed coefficients
## coef exp(coef) se(coef) z p
## Age 1.58457919 4.8772386 0.12297031 12.89 0.0e+00
## SeasonWet -0.37548490 0.6869561 0.13091206 -2.87 4.1e-03
## TB_stageExposed 1.48157845 4.3998852 0.21751584 6.81 9.7e-12
## TB_stageDiseased 2.30012696 9.9754488 0.26435469 8.70 0.0e+00
## SocialStatusSubordinate 0.73367572 2.0827221 0.21348281 3.44 5.9e-04
## Condition_weight -0.27935861 0.7562687 0.06611843 -4.23 2.4e-05
## Blautia_cluster 0.00471949 1.0047306 0.06993167 0.07 9.5e-01
## Bacteroides_cluster -0.02101547 0.9792038 0.09795373 -0.21 8.3e-01
## Peptoclostridium -0.04365207 0.9572870 0.08516750 -0.51 6.1e-01
## Lactococcus_cluster -0.16382231 0.8488928 0.08240857 -1.99 4.7e-02
## Bacillus_cluster 0.03871740 1.0394767 0.08558698 0.45 6.5e-01
## Clostridium_cluster -0.09186031 0.9122326 0.08657432 -1.06 2.9e-01
## Ruminococcus_UCG.014 -0.05779993 0.9438388 0.07910965 -0.73 4.7e-01
##
## Random effects
## Group Variable Std Dev Variance
## IndividID Intercept 1.064405 1.132957
sjPlot::plot_model(fit_network, axis.lim = c(0.8,11),
vline.color = "darkgrey", sort.est = T,
dot.size= 4, line.size = 1.5, show.p = T)+
theme_bw(base_size = 20)+ggtitle("")+ylab("Hazard ratio")
This chunk took 0.34 minutes
table(sample_data(meerkat_filtered)$TB_status)
##
## Unexposed Exposed Diseased
## 140 524 477
table(sample_data(meerkat_filtered)$TB_stage)
##
## Unexposed Exposed Diseased
## 421 630 90
ps_infected<-subset_samples(meerkat_filtered, TB_status !="Unexposed")
tb_df<-data.frame(table(sample_data(ps_infected)$IndividID, sample_data(ps_infected)$TB_stage))
tb_df2<-subset(tb_df, Var2 == "Diseased")
tb_df3<- subset(tb_df2, Freq>0)
id_keep<-as.character(tb_df3$Var1)
sample_data(ps_infected)$Keep<-sample_data(ps_infected)$IndividID %in% id_keep
ps_infected2<- subset_samples(ps_infected, Keep == T)
tb_df<-data.frame(table(sample_data(ps_infected)$IndividID, sample_data(ps_infected)$TB_stage))
tb_df2<-subset(tb_df, Var2 == "Unexposed")
tb_df3<- subset(tb_df2, Freq>0)
id_keep<-as.character(tb_df3$Var1)
sample_data(ps_infected2)$Keep<-sample_data(ps_infected2)$IndividID %in% id_keep
ps_infected3<- subset_samples(ps_infected2, Keep == T)
length(unique(sample_data(ps_infected3)$IndividID))
## [1] 11
table(sample_data(ps_infected3)$TB_stage)
##
## Unexposed Exposed Diseased
## 50 15 21
ggplot(sample_data(ps_infected3), aes(x = SampleDate, y = IndividID, group = IndividID))+geom_point(aes(col = TB_stage), size = 3)+geom_line()+ theme_bw()
This chunk took 0.1 minutes
sample_data(ps_infected3)$Days_exposure<- sample_data(ps_infected3)$SampleDate - sample_data(ps_infected3)$TB_exposure_date
sample_data(ps_infected3)$BeforeExposure <- sample_data(ps_infected3)$SampleDate < sample_data(ps_infected3)$TB_exposure_date
table(sample_data(ps_infected3)$BeforeExposure)
##
## FALSE TRUE
## 36 50
ggplot(sample_data(ps_infected3), aes(x = Days_exposure, y = IndividID, group = IndividID))+geom_point(aes(col = TB_stage), size = 3)+geom_line()+geom_vline(xintercept = 0, linetype = "dashed")
This chunk took 0.02 minutes
genus_ps<-tax_glom(ps_infected3, taxrank = "Genus")
#genus_ps<-tax_glom(subset_samples(ps_infected3, Storage == "FREEZEDRIED"), taxrank = "Genus")
genus_abundances_df<-data.frame(taxa_sums(genus_ps))
genus_abundances_df$ASV<-row.names(genus_abundances_df)
taxonomy<-data.frame(tax_table(genus_ps))
taxonomy$ASV<-row.names(taxonomy)
sample_data(genus_ps)<-sample_data(genus_ps)[,c( "feature.id")]
top_genera_melt<-psmelt(genus_ps)
# make a category for rare genera
top_genera_melt$Genus2<-ifelse(top_genera_melt$OTU %in% top_genera, top_genera_melt$Genus, "Rare taxa")
head(top_genera_melt)
top_genera_melt2<-top_genera_melt %>% group_by(Sample, Genus2) %>% summarize(Abundance2 = sum(Abundance))
metadata<-data.frame(sample_data(ps_infected3))
top_genera_melt3<- merge(top_genera_melt2, metadata, by.x = "Sample", by.y = "feature.id")
This chunk took 0.05 minutes
TB_estimates<-list()
Rainfall_estimates<-list()
taxanames<-unique(top_genera_melt3$Genus2)
for (i in 1:length(taxanames)){
tryCatch({ #catch errors
print(i)
taxa1<-subset(top_genera_melt3, Genus2 == taxanames[i])
# taxa1<-subset(top_genera_melt3, Genus2 == "Bacteroides")
metadata<-taxa1
#scale variables
metadata$TotalAbundance<-as.numeric(scale(log10(metadata$TotalAbundance)))
metadata$Rainfall<-as.numeric(scale(log10(metadata$sum_rainfall_month+1)))
metadata$TB_stage<-factor(metadata$TB_stage, levels = c("Unexposed", "Exposed","Diseased"))
metadata$Year<-factor(metadata$Year)
# fit gamm
m_taxa <- mgcv::bam(log10(Abundance2+1)~
s(HoursAfterSunrise, bs = "cr")+
s(TotalAbundance, bs = "cr")+
TB_stage+
Rainfall+
s(IndividID, bs = "re"),
#correlation = corARMA(form = ~ 1|Year, p = 3),
data=metadata,
family = gaussian)
summary<-summary(m_taxa)
## TB estimates
TB_estimates_df<-data.frame(summary$p.table)[1:3,]
TB_estimates_df$Level<-c("Unexposed", "Exposed", "Diseased")
names(TB_estimates_df)[4]<-"P_val"
TB_estimates_df$Estimate2<-ifelse(TB_estimates_df$Level == "Unexposed", 0, TB_estimates_df$Estimate)
TB_estimates_df$lower<-NA
TB_estimates_df$upper<-NA
# confidence intervals exposed
beta <- coef(m_taxa)
Vb <- vcov(m_taxa, unconditional = TRUE)
se <- sqrt(diag(Vb))
j <- which(names(beta) == "(Intercept)")
cis<- beta[j] + (c(-1,1) * (2 * se[j]))
TB_estimates_df[1,7]<-cis[1]
TB_estimates_df[1,8]<-cis[2]
# confidence intervals exposed
j <- which(names(beta) == "TB_stageExposed")
cis<- beta[j] + (c(-1,1) * (2 * se[j]))
TB_estimates_df[2,7]<-cis[1]
TB_estimates_df[2,8]<-cis[2]
# confidence intervals symptomatic
j <- which(names(beta) == "TB_stageDiseased")
cis<- beta[j] + (c(-1,1) * (2 * se[j]))
TB_estimates_df[3,7]<-cis[1]
TB_estimates_df[3,8]<-cis[2]
## add coefficient
TB_estimates_df$Coef <-as.numeric(coef(m_taxa)[1:3])
TB_estimates_df$Taxa <-taxanames[i]
TB_estimates[[i]]<-TB_estimates_df
## extract rainfall stats
rainfall_estimates<- data.frame(summary$p.table)[4,]
rainfall_estimates$Taxa<-taxanames[i]
Rainfall_estimates[[i]]<-rainfall_estimates
}, error=function(e){})
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
## [1] 11
## [1] 12
## [1] 13
## [1] 14
## [1] 15
## [1] 16
## [1] 17
## [1] 18
## [1] 19
## [1] 20
## [1] 21
## [1] 22
## [1] 23
## [1] 24
## [1] 25
## [1] 26
## [1] 27
This chunk took 0.03 minutes
names(TB_estimates)<-taxanames
TB_estimates_df<-do.call(rbind, TB_estimates)
TB_estimates_df$Significant <-TB_estimates_df$P_val < 0.05
# edit significance (intercept will always be significant)
sig_taxa<-subset(TB_estimates_df, Significant == T & Level !="Unexposed")
sig_taxa<-unique(sig_taxa$Taxa)
TB_estimates_df$Significant<- TB_estimates_df$Taxa %in% sig_taxa
## edit confidence intervals
TB_estimates_df$Level<-factor(TB_estimates_df$Level, levels = c("Diseased", "Exposed", "Unexposed"))
TB_estimates_df$lower<- ifelse(TB_estimates_df$Level == "Unexposed", TB_estimates_df$lower-TB_estimates_df$Estimate, TB_estimates_df$lower)
TB_estimates_df$upper<- ifelse(TB_estimates_df$Level == "Unexposed", TB_estimates_df$upper-TB_estimates_df$Estimate, TB_estimates_df$upper)
# order taxa
TB_estimates_df$Taxa<-factor(TB_estimates_df$Taxa, levels = taxa_order)
######
#############
#############
#############
plot_tb_2<-ggplot(subset(TB_estimates_df, Taxa!= "uncultured"), aes( x = Estimate2, y = Taxa, group = Level))+
theme_bw(base_size = 14)+
geom_vline(xintercept = 0, linetype = "dashed")+
geom_errorbarh(aes(col = Level, xmin = Estimate2- Std..Error, xmax = Estimate2+ Std..Error), height = 0, size = 1, position=position_dodge(width=0.5))+
geom_errorbarh(aes(alpha = Significant, xmin = Estimate2- Std..Error, xmax = Estimate2+ Std..Error), height = 0, size = 1, col = "grey", position=position_dodge(width=0.5))+
scale_alpha_discrete(range = c(1,0), guide = "none")+
geom_point(aes(col = Level), size = 2, position=position_dodge(width=0.5))+
scale_color_manual(values = c( "brown2", "skyblue","forestgreen"), guide = guide_legend(reverse = TRUE))+
xlim(c(-0.9,NA))+
theme(axis.text.y = element_blank(), axis.title.y = element_blank())+
xlab("Effect size")+
labs(col = "TB stage")+
theme(plot.margin=unit(c(1,0.2,0.2,0.2),"cm"))+
# legend
theme( legend.background = element_blank(),
legend.box.background = element_rect(colour = "grey", fill = "white"))+
theme(legend.position=c(0.2,0.95), legend.direction="vertical",
legend.key.height = unit(0.05, 'cm'),
legend.key.width = unit(0.02, 'cm'),
legend.title=element_text(size=8),
legend.text =element_text(size=8))+
guides(colour=guide_legend(title.position="top", title.hjust =0.5,reverse = TRUE))+
coord_cartesian(xlim = c(-1, 1))+
scale_x_continuous(breaks = c(-0.5, 0, 0.5))+
theme(legend.margin=margin(t = c(0.1,0.1,0.1,0.1), unit='cm'))
This chunk took 0 minutes
names(Rainfall_estimates)<-taxanames
rainfall_estimates_df<-do.call(rbind, Rainfall_estimates)
rainfall_estimates_df$Significant <-rainfall_estimates_df$Pr...t.. < 0.05
rainfall_estimates_df$Taxa<-factor(rainfall_estimates_df$Taxa, levels = taxa_order)
plot_rainfall<-
ggplot(subset(rainfall_estimates_df, Taxa!= "uncultured"), aes( x = Estimate, y = Taxa))+
theme_bw(base_size = 14)+
geom_point( aes(col = Significant), size = 3)+
geom_errorbarh(aes(col = Significant, xmin = Estimate- Std..Error, xmax = Estimate+ Std..Error), height = 0, size = 1)+
geom_vline(xintercept = 0, linetype = "dashed")+
scale_color_manual(values = c("grey", "blue"))+
theme(axis.text.y = element_blank(), axis.title.y = element_blank())+
xlab("Effect size")+
# xlim(c(-0.004,NA))+
#legend
theme( legend.background = element_blank(),
legend.box.background = element_rect(colour = "grey", fill = "white"))+
theme(legend.position=c(0.8,0.95), legend.direction="vertical",
legend.key.height = unit(0.02, 'cm'),
legend.key.width = unit(0.02, 'cm'),
legend.title=element_text(size=8),
legend.text =element_text(size=6))+
guides(colour=guide_legend(title.position="top", reverse = T,
title.hjust =0.5))+
coord_cartesian(xlim = c(-1, 1))+
scale_x_continuous(breaks = c(-0.5, 0, 0.5))
This chunk took 0 minutes
# Combining plots
rm_legend <- function(p){p + theme(legend.position = "none")}
ggarrange(rm_legend(plot_tb_1), plot_tb_2, plot_rainfall, nrow = 1, align = "h", widths = c(2.2,1,1), labels = c("a) Year", "b) TB", "c) Rainfall"), label.x = c(0.35,-0.05,-0.15))
#
This chunk took 0.07 minutes